### 1. Import Libraries & Data

In [2]:
import pandas as pd
import numpy as np

In [3]:
df0 = pd.read_csv("../../C_Datasets/titanic_data_train.csv")
df = df0.copy()
df.head()

Unnamed: 0,Sex,Pclass,SibSp,Parch,Alone,Embarked,Age,Ticket,Fare,Survived
0,1,3,1,0,1,2,22.0,21171,7.25,0
1,0,1,1,0,1,0,38.0,17599,71.2833,1
2,0,3,0,0,0,2,26.0,3101282,7.925,1
3,0,1,1,0,1,2,35.0,113803,53.1,1
4,1,3,0,0,0,2,35.0,373450,8.05,0


In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 10 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   Sex       891 non-null    int64  
 1   Pclass    891 non-null    int64  
 2   SibSp     891 non-null    int64  
 3   Parch     891 non-null    int64  
 4   Alone     891 non-null    int64  
 5   Embarked  891 non-null    int64  
 6   Age       891 non-null    float64
 7   Ticket    891 non-null    int64  
 8   Fare      891 non-null    float64
 9   Survived  891 non-null    int64  
dtypes: float64(2), int64(8)
memory usage: 69.7 KB


In [5]:
df_clf = df.drop(columns=["SibSp", "Parch", "Embarked"])
df_clf.head()

Unnamed: 0,Sex,Pclass,Alone,Age,Ticket,Fare,Survived
0,1,3,1,22.0,21171,7.25,0
1,0,1,1,38.0,17599,71.2833,1
2,0,3,0,26.0,3101282,7.925,1
3,0,1,1,35.0,113803,53.1,1
4,1,3,0,35.0,373450,8.05,0


### 2. Standardization

In [6]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()

In [7]:
df_clf.loc[:, "Age"] = scaler.fit_transform(df_clf.iloc[:, 3:-1])[:, 0]
df_clf.loc[:, "Ticket"] = scaler.fit_transform(df_clf.iloc[:, 3:-1])[:, 1]
df_clf.loc[:, "Fare"] = scaler.fit_transform(df_clf.iloc[:, 3:-1])[:, 2]

In [8]:
df_clf.head()

Unnamed: 0,Sex,Pclass,Alone,Age,Ticket,Fare,Survived
0,1,3,1,-0.551366,-0.423103,-0.502445,0
1,0,1,1,0.65403,-0.428495,0.786845,1
2,0,3,0,-0.250017,4.226398,-0.488854,1
3,0,1,1,0.428018,-0.283273,0.42073,1
4,1,3,0,0.428018,0.10867,-0.486337,0


### 3. Split Data Frame to Train & Test Set

In [9]:
from sklearn.model_selection import train_test_split

X = df_clf.drop(columns=["Survived"]).values
y = df_clf.loc[:, "Survived"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

### 4. Create Model

In [24]:
from sklearn.svm import SVC

svc_model = SVC().fit(X_train, y_train)

In [25]:
y_pred = svc_model.predict(X_test)

### 5. Evaluation

In [26]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

In [27]:
pd.DataFrame({"Model": "Support Vector Classifier",
              "Accuracy": accuracy,
              "Precision": precision,
              "Recall": recall,
              "F1-Score": f1},
            index = [0])

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Support Vector Classifier,0.776536,0.737705,0.652174,0.692308


In [None]:
SVC()
'linear', 'poly', 'rbf', 'sigmoid'

### 6. Optimization

In [20]:
from sklearn.model_selection import GridSearchCV

parameters = {'C': [50.0, 52.0, 54.0],
              'kernel': ['linear', 'poly', 'rbf', 'sigmoid']}

svc_model_grid = GridSearchCV(SVC(), parameters)

In [21]:
svc_model_grid.fit(X_train, y_train)

In [22]:
svc_model_grid.best_params_

{'C': 50.0, 'kernel': 'poly'}

In [23]:
svc_model_grid = SVC(C=50.0, kernel='poly').fit(X_train, y_train)
y_pred = svc_model_grid.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

pd.DataFrame({"Model": "Support Vector Classifier",
              "Accuracy": accuracy,
              "Precision": precision,
              "Recall": recall,
              "F1-Score": f1},
            index = [0])

Unnamed: 0,Model,Accuracy,Precision,Recall,F1-Score
0,Support Vector Classifier,0.826816,0.895833,0.623188,0.735043
