In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report 
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from tabulate import tabulate

In [2]:
df = pd.read_csv("files/titanic.csv")
df.columns = df.columns.str.lower()

print(df.columns)
print(df.describe())

Index(['passengerid', 'survived', 'pclass', 'name', 'sex', 'age', 'sibsp',
       'parch', 'ticket', 'fare', 'cabin', 'embarked'],
      dtype='object')
       passengerid    survived      pclass         age       sibsp   
count   891.000000  891.000000  891.000000  714.000000  891.000000  \
mean    446.000000    0.383838    2.308642   29.699118    0.523008   
std     257.353842    0.486592    0.836071   14.526497    1.102743   
min       1.000000    0.000000    1.000000    0.420000    0.000000   
25%     223.500000    0.000000    2.000000   20.125000    0.000000   
50%     446.000000    0.000000    3.000000   28.000000    0.000000   
75%     668.500000    1.000000    3.000000   38.000000    1.000000   
max     891.000000    1.000000    3.000000   80.000000    8.000000   

            parch        fare  
count  891.000000  891.000000  
mean     0.381594   32.204208  
std      0.806057   49.693429  
min      0.000000    0.000000  
25%      0.000000    7.910400  
50%      0.000000   14.4

In [3]:
df.drop(columns=['passengerid','name','ticket','cabin'],inplace=True)

In [4]:
df.isnull().sum()

survived      0
pclass        0
sex           0
age         177
sibsp         0
parch         0
fare          0
embarked      2
dtype: int64

In [5]:
df['age'].fillna(df['age'].mean(), inplace=True)
df = df.dropna(subset=["embarked"])

In [6]:
df.columns

Index(['survived', 'pclass', 'sex', 'age', 'sibsp', 'parch', 'fare',
       'embarked'],
      dtype='object')

In [7]:
print(df['sex'].unique())
print(df['embarked'].unique())
print(df['pclass'].unique())
print(df['sibsp'].unique())
print(df['parch'].unique())

['male' 'female']
['S' 'C' 'Q']
[3 1 2]
[1 0 3 4 2 5 8]
[0 1 2 5 3 4 6]


# Data Encoding

In [8]:
columns = ["pclass","sex","sibsp","parch", "embarked"]

for column in columns:
    df = pd.get_dummies(df, columns=[column],dtype=int)

In [9]:
df.head()

Unnamed: 0,survived,age,fare,pclass_1,pclass_2,pclass_3,sex_female,sex_male,sibsp_0,sibsp_1,...,parch_0,parch_1,parch_2,parch_3,parch_4,parch_5,parch_6,embarked_C,embarked_Q,embarked_S
0,0,22.0,7.25,0,0,1,0,1,0,1,...,1,0,0,0,0,0,0,0,0,1
1,1,38.0,71.2833,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,1,0,0
2,1,26.0,7.925,0,0,1,1,0,1,0,...,1,0,0,0,0,0,0,0,0,1
3,1,35.0,53.1,1,0,0,1,0,0,1,...,1,0,0,0,0,0,0,0,0,1
4,0,35.0,8.05,0,0,1,0,1,1,0,...,1,0,0,0,0,0,0,0,0,1


# Data Split

In [10]:
X = df.drop('survived',axis=1)
y = df['survived']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Data Scaler

In [11]:
sc = StandardScaler()
X_train.iloc[:,:2] = sc.fit_transform(X_train.iloc[:,:2])
X_test.iloc[:,:2] = sc.transform(X_test.iloc[:,:2])

# SVC Mode Training

In [12]:
svc_classifier = SVC(kernel='rbf',random_state=42)
svc_classifier.fit(X_train, y_train)

In [13]:
y_svc_pred = svc_classifier.predict(X_test)
svc_classifier.score(X_test,y_test)

0.8258426966292135

# Logistic Regression


In [14]:
from sklearn.linear_model import LogisticRegression
lr_classifier = LogisticRegression(random_state=42)
lr_classifier.fit(X_train,y_train)
y_lr_pred = lr_classifier.predict(X_train)
lr_classifier.score(X_test,y_test)


0.8146067415730337

# Perceptron

In [15]:
from sklearn.linear_model import Perceptron
tron_classifier = Perceptron(random_state=42)
tron_classifier.fit(X_train, y_train)
y_tron_pred = tron_classifier.predict(X_train)
tron_classifier.score(X_test,y_test)

0.7752808988764045

In [16]:
def CV_model_trainer(model_list, param_grid, X_train, y_train,
                   X_test, y_test):

    model_return = dict()

    try:
        for model in model_list:
            grid = GridSearchCV(model(), param_grid[model], refit=True)
            grid.fit(X_train,y_train)

            best_model = grid.best_estimator_
            grid_predictions = best_model.predict(X_test)

            # print(classification_report(y_test, grid_predictions))
            model_return[model] = classification_report(y_test, grid_predictions, output_dict=True)

        return model_return
    except Exception as e:
        print("Error:",e)
        return -1

In [17]:
def model_trainer(model_list, X_train, y_train,
                   X_test, y_test):
    model_return = dict()

    try:
        for model in model_list:
            regressor = model()
            regressor.fit(X_train,y_train)
            y_pred = regressor.predict(X_test)

            # print(classification_report(y_test, y_pred))
            model_return[model] = classification_report(y_test, y_pred,output_dict=True)

        return model_return
    except Exception as e:
        print(e)
        return 0

In [18]:
def show_model_result(model_report, verbose=False):
    print_arr = [['Model','Category','Accuracy','Precision','Recall','f1-Score']]
    
    for model in model_report:
        for i in range(0,2):
            print_arr.append([str(model).split('.')[-1].split('\'')[0],"Survivant",model_report[model]['accuracy'],
                            model_report[model][str(i)]['precision'],model_report[model][str(i)]['recall'],
                            model_report[model][str(i)]['f1-score']])

    df = pd.DataFrame(print_arr[1:-1], columns=print_arr[0])

    if verbose :
        print(tabulate(print_arr, headers="firstrow", tablefmt="rounded_grid", floatfmt=".2f"))

    return df

In [19]:
model_list = [SVC, Perceptron, LogisticRegression]

model_report = model_trainer(model_list, X_train, y_train, X_test, y_test)

df = show_model_result(model_report,verbose=True)

╭────────────────────┬────────────┬────────────┬─────────────┬──────────┬────────────╮
│ Model              │ Category   │   Accuracy │   Precision │   Recall │   f1-Score │
├────────────────────┼────────────┼────────────┼─────────────┼──────────┼────────────┤
│ SVC                │ Survivant  │       0.83 │        0.85 │     0.87 │       0.86 │
├────────────────────┼────────────┼────────────┼─────────────┼──────────┼────────────┤
│ SVC                │ Survivant  │       0.83 │        0.79 │     0.75 │       0.77 │
├────────────────────┼────────────┼────────────┼─────────────┼──────────┼────────────┤
│ Perceptron         │ Survivant  │       0.70 │        0.67 │     0.98 │       0.80 │
├────────────────────┼────────────┼────────────┼─────────────┼──────────┼────────────┤
│ Perceptron         │ Survivant  │       0.70 │        0.89 │     0.25 │       0.39 │
├────────────────────┼────────────┼────────────┼─────────────┼──────────┼────────────┤
│ LogisticRegression │ Survivant  │       0

In [20]:
model_list = [SVC, Perceptron, LogisticRegression]
param_grid = {
              SVC:{'C': np.arange(0.1,10,0.1),'gamma': ['scale','auto'],'kernel': ['rbf', 'poly'], 'max_iter':(100000,)},
              Perceptron: {'max_iter':(100000,)},
              LogisticRegression: {'C': np.arange(0.1,10,0.1), 'max_iter':(100000,)}
              }

model_report = CV_model_trainer(model_list, param_grid, X_train, y_train, X_test, y_test)

show_model_result(model_report,verbose=True)

╭────────────────────┬────────────┬────────────┬─────────────┬──────────┬────────────╮
│ Model              │ Category   │   Accuracy │   Precision │   Recall │   f1-Score │
├────────────────────┼────────────┼────────────┼─────────────┼──────────┼────────────┤
│ SVC                │ Survivant  │       0.80 │        0.82 │     0.85 │       0.84 │
├────────────────────┼────────────┼────────────┼─────────────┼──────────┼────────────┤
│ SVC                │ Survivant  │       0.80 │        0.75 │     0.71 │       0.73 │
├────────────────────┼────────────┼────────────┼─────────────┼──────────┼────────────┤
│ Perceptron         │ Survivant  │       0.70 │        0.67 │     0.98 │       0.80 │
├────────────────────┼────────────┼────────────┼─────────────┼──────────┼────────────┤
│ Perceptron         │ Survivant  │       0.70 │        0.89 │     0.25 │       0.39 │
├────────────────────┼────────────┼────────────┼─────────────┼──────────┼────────────┤
│ LogisticRegression │ Survivant  │       0

Unnamed: 0,Model,Category,Accuracy,Precision,Recall,f1-Score
0,SVC,Survivant,0.797753,0.823009,0.853211,0.837838
1,SVC,Survivant,0.797753,0.753846,0.710145,0.731343
2,Perceptron,Survivant,0.696629,0.672956,0.981651,0.798507
3,Perceptron,Survivant,0.696629,0.894737,0.246377,0.386364
4,LogisticRegression,Survivant,0.814607,0.858491,0.834862,0.846512
