[Grid Search Kernal](https://www.kaggle.com/xezxey/my-heart-will-go-on-titanic-disaster-gridsearchcv)  

In [69]:
import numpy as np
import pandas as pd
import scipy as sp
import matplotlib.pyplot as plt
import seaborn as sns
import re
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC, LinearSVC
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier , GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline

In [70]:
df = pd.read_csv('./data/full_train_test.csv')

In [71]:
df_train = df[(df['train'] == 1)]
df_test = df[(df['test'] == 1)]

#### SVM

In [72]:
# SVM

feature_names = [
    'pclass', 'age_scaled', 
    'true_fare_scaled', 
    'family_scaled', 'group_scaled',
#     'family_size_1', 'family_size_2', 'family_size_3',
#     'group_size_1', 'group_size_2', 'group_size_3',
    'sex', 
    'sex_0', 'sex_1',
    'title_1', 'title_2', 'title_3', 'title_4', 
]

features = df_train[feature_names]
label = df_train[['survived']]

param_grid = {
    'C':[1,10,100,1000],
    'gamma':[1,0.1,0.001,0.0001],
}
model = GridSearchCV(
    SVC(),
    param_grid,
    verbose=1, 
    cv=5
)
model.fit(features, label)
print('Score: ', model.best_score_)
print('Params: ', model.best_params_)
print('Estimator: ', model.best_estimator_)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
Score:  0.8372615039281706
Params:  {'C': 1000, 'gamma': 0.1}
Estimator:  SVC(C=1000, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma=0.1, kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False)


[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:    2.7s finished


#### MLP

In [80]:
# MPL

feature_names = [
    'pclass', 'age_scaled', 
    'true_fare_scaled', 
    'family_scaled', 'group_scaled',
#     'family_size_1', 'family_size_2', 'family_size_3',
#     'group_size_1', 'group_size_2', 'group_size_3',
#     'sex', 
    'sex_0', 'sex_1',
    'title_1', 'title_2', 'title_3', 'title_4', 
]
features = df_train[feature_names]
label = df_train[['survived']]

param_grid = {
    'solver': ['lbfgs','adam'], 
    'max_iter': [500,1000,1500], 
    'alpha': 10.0 ** -np.arange(1, 7), 
    'hidden_layer_sizes':np.arange(3, 8)
}

model = GridSearchCV(
    MLPClassifier(),
    param_grid,
    verbose=1, 
    cv=3
)

model.fit(features, label)
print('Score: ', model.best_score_)
print('Params: ', model.best_params_)
print('Estimator: ', model.best_estimator_)

Fitting 3 folds for each of 180 candidates, totalling 540 fits


[Parallel(n_jobs=1)]: Done 540 out of 540 | elapsed:  2.3min finished


Score:  0.8361391694725028
Params:  {'alpha': 0.0001, 'hidden_layer_sizes': 7, 'max_iter': 1500, 'solver': 'adam'}
Estimator:  MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=7, learning_rate='constant',
       learning_rate_init=0.001, max_iter=1500, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)


#### Linear SVM

In [79]:
feature_names = [
    'pclass', 'age_scaled', 
    'true_fare_scaled', 
    'family_scaled', 'group_scaled',
#     'family_size_1', 'family_size_2', 'family_size_3',
#     'group_size_1', 'group_size_2', 'group_size_3',
#     'sex', 
    'sex_0', 'sex_1',
    'title_1', 'title_2', 'title_3', 'title_4', 
]
features = df_train[feature_names]
label = df_train[['survived']]

param_grid = {
    'C':[1,10,100,1000],
    'class_weight': ['balanced', None]
        
}
model = GridSearchCV(
    LinearSVC(),
    param_grid,
    verbose=1, 
    cv=5
)

model.fit(features, label)
print('Score: ', model.best_score_)
print('Params: ', model.best_params_)
print('Estimator: ', model.best_estimator_)

Fitting 5 folds for each of 8 candidates, totalling 40 fits
Score:  0.8294051627384961
Params:  {'C': 1, 'class_weight': None}
Estimator:  LinearSVC(C=1, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)


[Parallel(n_jobs=1)]: Done  40 out of  40 | elapsed:    1.0s finished


#### Logistic Regression

In [78]:
feature_names = [
    'pclass', 'age_scaled', 
    'true_fare_scaled', 
    'family_scaled', 'group_scaled',
#     'family_size_1', 'family_size_2', 'family_size_3',
#     'group_size_1', 'group_size_2', 'group_size_3',
#     'sex', 
    'sex_0', 'sex_1',
    'title_1', 'title_2', 'title_3', 'title_4', 
]
features = df_train[feature_names]
label = df_train[['survived']]

param_grid = {
    'C':[1,10,100,1000],
    'solver': ['newton-cg', 'lbfgs', 'sag', 'saga'],
    'multi_class': ['ovr', 'multinomial']        
}
model = GridSearchCV(
    LogisticRegression(),
    param_grid,
    verbose=1, 
    cv=5
)

model.fit(features, label)
print('Score: ', model.best_score_)
print('Params: ', model.best_params_)
print('Estimator: ', model.best_estimator_)

Fitting 5 folds for each of 32 candidates, totalling 160 fits
Score:  0.8260381593714927
Params:  {'C': 1, 'multi_class': 'multinomial', 'solver': 'lbfgs'}
Estimator:  LogisticRegression(C=1, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='multinomial',
          n_jobs=1, penalty='l2', random_state=None, solver='lbfgs',
          tol=0.0001, verbose=0, warm_start=False)


[Parallel(n_jobs=1)]: Done 160 out of 160 | elapsed:    3.8s finished
