In [4]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier

In [5]:
titanic_df = pd.read_csv('datasets/titanic_processed.csv')
X = titanic_df.drop('Survived', axis=1)
Y = titanic_df['Survived']

x_train, x_test, y_train, y_test = train_test_split(X, Y,  test_size=0.2)

In [6]:
def summarize_classification(y_test, y_pred):
    acc = accuracy_score(y_test, y_pred, normalize=True)
    num_acc = accuracy_score(y_test, y_pred, normalize=False)

    prec = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)

    return {'accuracy': acc, 'precision': prec, 'recall': recall, 'accuracy_count': num_acc}

In [8]:
from sklearn.model_selection import GridSearchCV

parameters = {'max_depth': [2, 4, 5, 7, 9, 10]}
grid_search = GridSearchCV(DecisionTreeClassifier(), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'max_depth': 4}

In [9]:
for i in range(6):
    print('Paramaters: ', grid_search.cv_results_['params'][i])
    print('Mean test score: ', grid_search.cv_results_['mean_test_score'][i])
    print('Rank:: ', grid_search.cv_results_['rank_test_score'][i])

Paramaters:  {'max_depth': 2}
Mean test score:  0.785602896129212
Rank::  2
Paramaters:  {'max_depth': 4}
Mean test score:  0.7873108697670101
Rank::  1
Paramaters:  {'max_depth': 5}
Mean test score:  0.7785203750116031
Rank::  5
Paramaters:  {'max_depth': 7}
Mean test score:  0.780293325907361
Rank::  4
Paramaters:  {'max_depth': 9}
Mean test score:  0.7820198644760047
Rank::  3
Paramaters:  {'max_depth': 10}
Mean test score:  0.7697298802561962
Rank::  6


In [15]:
parameters = {'penalty': ['l1', 'l2'], 'C': [0.1, .4, .8, 1, 2, 5]}
grid_search = GridSearchCV(LogisticRegression(solver='liblinear'), parameters, cv=3, return_train_score=True)
grid_search.fit(x_train, y_train)

grid_search.best_params_

{'C': 5, 'penalty': 'l1'}

In [16]:
for i in range(12):
    print('Paramaters: ', grid_search.cv_results_['params'][i])
    print('Mean test score: ', grid_search.cv_results_['mean_test_score'][i])
    print('Rank:: ', grid_search.cv_results_['rank_test_score'][i])

Paramaters:  {'C': 0.1, 'penalty': 'l1'}
Mean test score:  0.7803118908382066
Rank::  6
Paramaters:  {'C': 0.1, 'penalty': 'l2'}
Mean test score:  0.7767752715121136
Rank::  9
Paramaters:  {'C': 0.4, 'penalty': 'l1'}
Mean test score:  0.776803118908382
Rank::  8
Paramaters:  {'C': 0.4, 'penalty': 'l2'}
Mean test score:  0.7732850645131347
Rank::  12
Paramaters:  {'C': 0.8, 'penalty': 'l1'}
Mean test score:  0.7873479996287015
Rank::  2
Paramaters:  {'C': 0.8, 'penalty': 'l2'}
Mean test score:  0.7732943469785575
Rank::  11
Paramaters:  {'C': 1, 'penalty': 'l1'}
Mean test score:  0.7838392276988769
Rank::  5
Paramaters:  {'C': 1, 'penalty': 'l2'}
Mean test score:  0.7750580154088927
Rank::  10
Paramaters:  {'C': 2, 'penalty': 'l1'}
Mean test score:  0.785602896129212
Rank::  3
Paramaters:  {'C': 2, 'penalty': 'l2'}
Mean test score:  0.77857606980414
Rank::  7
Paramaters:  {'C': 5, 'penalty': 'l1'}
Mean test score:  0.7891209505244593
Rank::  1
Paramaters:  {'C': 5, 'penalty': 'l2'}
Mean