In [1]:
# TODOs:
#     1. Make categorical and numerical datasets
#     2. Learn how to handle each type of dataset
#     3. Impute missing data
#     4. Make different piplines for the categories

In [None]:
import sys

if str(type(sys.stdout)) == "<class 'ipykernel.iostream.OutStream'>":
    saved_std = sys.stdout
else:
    sys.stdout = saved_std

sys.stdout

In [None]:
from sklearn.pipeline import make_pipeline, Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, Binarizer
from sklearn.impute import SimpleImputer
from sklearn.ensemble import (RandomForestClassifier, GradientBoostingClassifier,
                              AdaBoostClassifier,ExtraTreesClassifier)
from sklearn import tree
from sklearn.linear_model import LogisticRegression
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import GridSearchCV,cross_val_score
from sklearn import metrics
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
data = pd.read_csv('train.csv', index_col='PassengerId')
y = data['Survived']
data.drop(['Name','Ticket','Cabin','Survived'],axis=1,inplace=True)
data

In [None]:
test = pd.read_csv('test.csv', index_col='PassengerId')
test.drop(['Name','Cabin','Ticket'],axis=1,inplace=True)

In [None]:
Categorical_cols = ['Pclass','Embarked','Sex','SibSp']
Numerical_cols = ['Fare','Age']

Ctgrcl_trns = make_pipeline(SimpleImputer(strategy='most_frequent'),OneHotEncoder())

# Sex: ['male','female','male','other']
# OHE: [[1,0,1,0],[0,1,0,0],[0,0,0,1]]

Nmrcl_trns = make_pipeline(SimpleImputer(strategy='mean'))
Preprocess = ColumnTransformer([('Categorical_cols', Ctgrcl_trns,Categorical_cols),
                                ('Numrecial_cols',Nmrcl_trns,Numerical_cols)])

Gradient = GradientBoostingClassifier(warm_start=True)
Pipe_Grad = Pipeline([
    ('Preprocessing', Preprocess),
    ('Gradient', Gradient)
])

Forest = RandomForestClassifier(warm_start=True)
Pipe_Forest = Pipeline([
    ('Preprocessing', Preprocess),
    ('Forest', Forest)
])

In [None]:
params_Frst = dict(Forest__n_estimators=[50,100,200,300,500],
                   Forest__max_depth=[7,8,9,None],
                   Forest__random_state=[11])

params_Grad = dict(Gradient__n_estimators = [100,200,300,400,500],
                   Gradient__max_depth=range(1,4), 
                   Gradient__random_state=[12],
                   Gradient__learning_rate=[0.1,0.05,0.01])


In [None]:
def print_best(Grid,Prints = True):
    if Prints:
        print(Grid.best_score_,Grid.best_params_)
    if not Prints:
        return [Grid.best_score_,Grid.best_params_]

In [None]:
Forest_Grid_NR = GridSearchCV(Pipe_Forest,param_grid=params_Frst,scoring='f1',cv=10,return_train_score=True)

Grad_Grid_NR = GridSearchCV(Pipe_Grad,param_grid=params_Grad,scoring='roc_auc',cv=10,return_train_score=True)

Grad_Grid_NR.fit(data,y)
print_best(Grad_Grid_NR)

Forest_Grid_NR.fit(data,y)
print_best(Forest_Grid_NR)

In [None]:
Forest_Grid_OF = GridSearchCV(Pipe_Forest,param_grid=params_Frst,scoring='accuracy',cv=5,return_train_score=True)
Grad_Grid_OF = GridSearchCV(Pipe_Grad,param_grid=params_Grad,scoring='accuracy',cv=5,return_train_score=True)

Grad_Grid_OF.fit(data,y)
print_best(Grad_Grid_OF)

Forest_Grid_OF.fit(data,y)
print_best(Forest_Grid_OF)

In [None]:
Forest_pred_OF = pd.Series(Forest_Grid_OF.predict(test), index=test.index, name='Survived')
Grad_pred_OF = pd.Series(Grad_Grid_OF.predict(test), index=test.index, name='Survived')

print(metrics.confusion_matrix(y,Forest_Grid_OF.predict(data)))
print(metrics.confusion_matrix(y,Grad_Grid_OF.predict(data)))


Forest_pred_OF.to_csv('Predictions/Forest_Pred_OF_1.csv')
Grad_pred_OF.to_csv('Predictions/Grad_Pred_OF_1.csv')


# Confusion Matrix:
#    | 0  | 1  |
#  0 | TN | FN |
#  1 | FP | TP |

In [None]:
Forest_pred_NR = pd.Series(Forest_Grid_NR.predict(test), index=test.index, name='Survived')
Grad_pred_NR = pd.Series(Grad_Grid_NR.predict(test), index=test.index, name='Survived')

print(metrics.confusion_matrix(y,Forest_Grid_NR.predict(data)))
print(metrics.confusion_matrix(y,Grad_Grid_NR.predict(data)))

Forest_pred_NR.to_csv('Predictions/Forest_Pred_NR_1.csv')
Grad_pred_NR.to_csv('Predictions/Grad_Pred_NR_1.csv')

In [None]:
params_Grad_f1 = dict(Gradient__n_estimators = [100,400,600],
                   Gradient__max_depth=[2,5], 
                   Gradient__random_state=[42],
                   Gradient__learning_rate=[0.2,0.1,0.05])

# params_Grad_f1 = dict(Gradient__n_estimators = [400],
#                    Gradient__max_depth=[2], 
#                    Gradient__random_state=[42],
#                    Gradient__learning_rate=[0.1])

params_Frst_f1 = dict(Forest__n_estimators=[200,500,700],
                   Forest__max_depth=[None,6,9,12],
                   Forest__random_state=[42])


In [61]:

Grad_Grid_f1 = GridSearchCV(Pipe_Grad,param_grid=params_Grad_f1,scoring='f1',cv=5,verbose=3,n_jobs=1)
Forest_Grid_f1 = GridSearchCV(Pipe_Forest,param_grid=params_Frst_f1,scoring='f1',cv=5,verbose=3,n_jobs=1)

sys.stdout = open(1, 'w')
Grad_Grid_f1.fit(data,y)
print('Done with grid :)')
sys.stdout = saved_std

print_best(Grad_Grid_f1)

sys.stdout = open(1, 'w')
Forest_Grid_f1.fit(data,y)
print('Done with grid :)')
sys.stdout = saved_std

print_best(Forest_Grid_f1)

0.7591260150442718 {'Gradient__learning_rate': 0.1, 'Gradient__max_depth': 2, 'Gradient__n_estimators': 400, 'Gradient__random_state': 42}
0.7551045173645473 {'Forest__max_depth': 12, 'Forest__n_estimators': 700, 'Forest__random_state': 42}


In [62]:
Grad_pred_f1 = pd.Series(Grad_Grid_f1.predict(test), index=test.index, name='Survived')
Forest_pred_f1 = pd.Series(Forest_Grid_f1.predict(test), index=test.index, name='Survived')

print(metrics.confusion_matrix(y,Grad_Grid_f1.predict(data)))
print(metrics.confusion_matrix(y,Forest_Grid_f1.predict(data)))


Grad_pred_f1.to_csv('Predictions/Grad_Pred_f1.csv')
Forest_pred_f1.to_csv('Predictions/Forest_Pred_f1.csv')


[[517  32]
 [ 57 285]]
[[543   6]
 [ 27 315]]


In [None]:
Tree = tree.DecisionTreeClassifier(random_state=32)
Pipe_Tree = Pipeline([
    ('Preprocessing', Preprocess),
    ('Tree', Tree)
])

params_Tree = dict(Tree__max_depth = [None],
                   Tree__splitter = ['best','random'],
                   Tree__min_samples_split = range(2,4),
                   Tree__min_samples_leaf = range(1,4),
                   Tree__max_leaf_nodes = [None] + list(range(150,300,50))
                   )


In [64]:
Tree_Grid = GridSearchCV(Pipe_Tree,param_grid=params_Tree,scoring='f1',cv=10,verbose=3)

sys.stdout = open(1, 'w')
Tree_Grid.fit(data,y)
print('Done with grid :)')
sys.stdout = saved_std

print_best(Tree_Grid)
print(metrics.confusion_matrix(y,Tree_Grid.predict(data)))

0.7534598724095104 {'Tree__max_depth': None, 'Tree__max_leaf_nodes': 200, 'Tree__min_samples_leaf': 2, 'Tree__min_samples_split': 2, 'Tree__splitter': 'random'}
[[521  28]
 [ 76 266]]


In [None]:
logreg = LogisticRegression(random_state=32)
Pipe_logreg = Pipeline([
    ('Preprocessing', Preprocess),
    ('logreg', logreg)
])

params_logreg = dict(logreg__penalty = ['l1','l2'],
                     logreg__solver = ['liblinear'],
                     logreg__max_iter = [100,125,75],
                     logreg__C = np.arange(1,2,0.05),
                     logreg__class_weight = ['balanced',None]
                  )

In [82]:
logreg_Grid = GridSearchCV(Pipe_logreg,param_grid=params_logreg,scoring='f1',cv=10,verbose=3)

sys.stdout = open(1, 'w')
logreg_Grid.fit(data,y)
print('Done with grid :)')
sys.stdout = saved_std

print_best(logreg_Grid)
print(metrics.confusion_matrix(y,logreg_Grid.predict(data)))

0.734546590750368 {'logreg__C': 1.1, 'logreg__class_weight': 'balanced', 'logreg__max_iter': 100, 'logreg__penalty': 'l1', 'logreg__solver': 'liblinear'}
[[437 112]
 [ 73 269]]


In [None]:
Extra_For = ExtraTreesClassifier(warm_start=True,random_state=1)
Pipe_Extra = Pipeline([
    ('Preprocessing', Preprocess),
    ('Extra', Extra_For)
])

In [None]:
params_Extra = dict(Extra__n_estimators = [50,100,200],
                    Extra__max_depth = [None],
                    Extra__class_weight = [None],
                    Extra__ccp_alpha = [0.0]
                    )

In [35]:
sys.stdout = open(1, 'w')
Extra_Grid_f1 = GridSearchCV(Pipe_Extra,param_grid=params_Extra,scoring='f1',cv=10,n_jobs=1,verbose=3)
Extra_Grid_f1.fit(data,y)
print('Done with grid :)')
sys.stdout = saved_std

print_best(Extra_Grid_f1)
print(metrics.confusion_matrix(y,Extra_Grid_f1.predict(data)))
pd.Series(Extra_Grid_f1.predict(test), index=test.index, name='Survived').to_csv('Predictions/Extra_Pred_f1.csv')

0.7321402121164768 {'Extra__ccp_alpha': 0.0, 'Extra__class_weight': None, 'Extra__max_depth': None, 'Extra__n_estimators': 100}


In [None]:
Ada = AdaBoostClassifier(random_state=1,base_estimator = tree.DecisionTreeClassifier(random_state=1))
Pipe_Ada = Pipeline([
    ('Preprocessing', Preprocess),
    ('Ada', Ada)
])
# possible models for Ada:
# ExtraTrees
# 

In [None]:
params_Ada = dict(
                  Ada__n_estimators = [50,100,200]
                  )

In [88]:
sys.stdout = open(1, 'w')
Ada_Grid_f1 = GridSearchCV(Pipe_Ada,param_grid=params_Ada,cv=10,verbose=3,scoring='f1')
Ada_Grid_f1.fit(data,y)
print('Done with grid :)')
sys.stdout = saved_std

print_best(Ada_Grid_f1)
print(metrics.confusion_matrix(y,Ada_Grid_f1.predict(data)))
pd.Series(Ada_Grid_f1.predict(test), index=test.index, name='Survived').to_csv('Predictions/Ada_Pred_f1.csv')

0.744563123023356 {'Ada__n_estimators': 50}
[[539  10]
 [  7 335]]


## Bests models: 
1. GradientBoosting (f1)
2. RandomForest (f1)

### Now to feature engineering
Our features should include these facts:
1. Kids and teens have a higher chance of surviving
2. People with family have a higher chance of surviving

In [None]:
data['Age'] = Nmrcl_trns.fit_transform(data[['Age']])
data['Has_Someone'] = Binarizer(threshold=0.9).transform([data['Parch'] + data['SibSp']])[0]
data['Has_SibSp'] = Binarizer(threshold=0.9).transform([data['SibSp']])[0]
data['Under_17'] = 1 - Binarizer(threshold=17).transform([(data['Age'])])[0]
data

In [None]:
test['Age'] = Nmrcl_trns.fit_transform(test[['Age']])
test['Has_Someone'] = Binarizer(threshold=0.9).transform([test['Parch'] + test['SibSp']])[0]
test['Has_SibSp'] = Binarizer(threshold=0.9).transform([test['SibSp']])[0]
test['Under_17'] = 1 - Binarizer(threshold=17).transform([(test['Age'])])[0]
test

In [None]:
Feature_Pairs = [
    (['Sex','Pclass','Embarked'],['SibSp','Age','Fare']),
    (['Sex','Pclass','Embarked','Under_17','Has_SibSp'],['Fare']),
    (['Sex','Pclass','Embarked','Under_17','Has_SibSp'],[]),
    (['Sex','Pclass','Embarked','Has_SibSp'],['Age']),
    (['Sex','Pclass','Embarked','Under_17','Has_Someone'],[]),
    (['Sex','Pclass','SibSp','Embarked'],['Age','Fare']),
    (['Sex','Pclass','SibSp','Embarked','Under_17','Has_Someone'],['Age','Fare']),
    (['Sex','Pclass','Embarked','Under_17'],['SibSp','Age','Fare']),
]

scores = []

In [138]:
for pair in Feature_Pairs[0:2]:
    Preprocess2 = ColumnTransformer([('Categorical_cols', Ctgrcl_trns,pair[0]),
                                 ('Numrecial_cols',Nmrcl_trns,pair[1])])
    
    Grad_Fin = GradientBoostingClassifier(warm_start=True)
    Pipe_Grad_Fin = Pipeline([
        ('Preprocessing', Preprocess2),
        ('Gradient', Gradient)
    ])

    Forest_Fin = RandomForestClassifier(warm_start=True)
    Pipe_Forest_Fin = Pipeline([
        ('Preprocessing', Preprocess2),
        ('Forest', Forest)
    ])
    
    Grad_Grid_Fin = GridSearchCV(Pipe_Grad_Fin,param_grid=params_Grad_f1,scoring='f1',cv=10,verbose=10,n_jobs=1)
    Forest_Grid_Fin = GridSearchCV(Pipe_Forest_Fin,param_grid=params_Frst_f1,scoring='f1',cv=10,verbose=10,n_jobs=1)

    sys.stdout = open(1, 'w')
    Grad_Grid_Fin.fit(data,y)
    print('Done with grid :)')
    sys.stdout = saved_std
    scores.append([print_best(Grad_Grid_Fin,Prints=False)])
    
    print_best(Grad_Grid_Fin)

    sys.stdout = open(1, 'w')
    Forest_Grid_Fin.fit(data,y)
    print('Done with grid :)')
    sys.stdout = saved_std
    scores.append([print_best(Forest_Grid_Fin,Prints=False)])

    print_best(Forest_Grid_Fin)
    print("\n",pair)

0.7702513487952218 {'Gradient__learning_rate': 0.05, 'Gradient__max_depth': 2, 'Gradient__n_estimators': 600, 'Gradient__random_state': 42}
0.7731677336618445 {'Forest__max_depth': 9, 'Forest__n_estimators': 200, 'Forest__random_state': 42}

 (['Sex', 'Pclass', 'Embarked'], ['SibSp', 'Age', 'Fare'])
0.7732873120586929 {'Gradient__learning_rate': 0.1, 'Gradient__max_depth': 5, 'Gradient__n_estimators': 100, 'Gradient__random_state': 42}
0.7556297083572934 {'Forest__max_depth': 9, 'Forest__n_estimators': 500, 'Forest__random_state': 42}

 (['Sex', 'Pclass', 'Embarked', 'Under_17', 'Has_SibSp'], ['Fare'])


In [139]:
for pair in Feature_Pairs[2:-1]:
    Preprocess2 = ColumnTransformer([('Categorical_cols', Ctgrcl_trns,pair[0]),
                                 ('Numrecial_cols',Nmrcl_trns,pair[1])])
    
    Grad_Fin = GradientBoostingClassifier(warm_start=True)
    Pipe_Grad_Fin = Pipeline([
        ('Preprocessing', Preprocess2),
        ('Gradient', Gradient)
    ])

    Forest_Fin = RandomForestClassifier(warm_start=True)
    Pipe_Forest_Fin = Pipeline([
        ('Preprocessing', Preprocess2),
        ('Forest', Forest)
    ])
    
    Grad_Grid_Fin = GridSearchCV(Pipe_Grad_Fin,param_grid=params_Grad_f1,scoring='f1',cv=10,verbose=10,n_jobs=1)
    Forest_Grid_Fin = GridSearchCV(Pipe_Forest_Fin,param_grid=params_Frst_f1,scoring='f1',cv=10,verbose=10,n_jobs=1)

    sys.stdout = open(1, 'w')
    Grad_Grid_Fin.fit(data,y)
    print_best(Grad_Grid_Fin)
    print('Done with grid :)')
    sys.stdout = saved_std
    scores.append([print_best(Grad_Grid_Fin,Prints=False)])

    print_best(Grad_Grid_Fin)

    sys.stdout = open(1, 'w')
    Forest_Grid_Fin.fit(data,y)
    print_best(Grad_Grid_Fin)
    print('Done with grid :)')
    sys.stdout = saved_std
    scores.append([print_best(Forest_Grid_Fin,Prints=False)])

    print_best(Forest_Grid_Fin)
    print("\n",pair)

0.7320755603515294 {'Gradient__learning_rate': 0.1, 'Gradient__max_depth': 2, 'Gradient__n_estimators': 400, 'Gradient__random_state': 42}
0.7216048015283008 {'Forest__max_depth': 6, 'Forest__n_estimators': 200, 'Forest__random_state': 42}

 (['Sex', 'Pclass', 'Embarked', 'Under_17', 'Has_SibSp'], [])
0.7549350622890174 {'Gradient__learning_rate': 0.2, 'Gradient__max_depth': 2, 'Gradient__n_estimators': 400, 'Gradient__random_state': 42}
0.7447045928248592 {'Forest__max_depth': 6, 'Forest__n_estimators': 200, 'Forest__random_state': 42}

 (['Sex', 'Pclass', 'Embarked', 'Has_SibSp'], ['Age'])
0.7266765437720577 {'Gradient__learning_rate': 0.05, 'Gradient__max_depth': 2, 'Gradient__n_estimators': 400, 'Gradient__random_state': 42}
0.7142252496161967 {'Forest__max_depth': None, 'Forest__n_estimators': 200, 'Forest__random_state': 42}

 (['Sex', 'Pclass', 'Embarked', 'Under_17', 'Has_Someone'], [])
0.7697356668170433 {'Gradient__learning_rate': 0.1, 'Gradient__max_depth': 2, 'Gradient__n_e

In [140]:
for pair in [Feature_Pairs[-1]]:
    Preprocess2 = ColumnTransformer([('Categorical_cols', Ctgrcl_trns,pair[0]),
                                 ('Numrecial_cols',Nmrcl_trns,pair[1])])
    
    Grad_Fin = GradientBoostingClassifier(warm_start=True)
    Pipe_Grad_Fin = Pipeline([
        ('Preprocessing', Preprocess2),
        ('Gradient', Gradient)
    ])

    Forest_Fin = RandomForestClassifier(warm_start=True)
    Pipe_Forest_Fin = Pipeline([
        ('Preprocessing', Preprocess2),
        ('Forest', Forest)
    ])
    
    Grad_Grid_Fin = GridSearchCV(Pipe_Grad_Fin,param_grid=params_Grad_f1,scoring='f1',cv=10,verbose=10,n_jobs=1)
    Forest_Grid_Fin = GridSearchCV(Pipe_Forest_Fin,param_grid=params_Frst_f1,scoring='f1',cv=10,verbose=10,n_jobs=1)

    sys.stdout = open(1, 'w')
    Grad_Grid_Fin.fit(data,y)
    print_best(Grad_Grid_Fin)
    print('Done with grid :)')
    sys.stdout = saved_std
    scores.append(print_best(Grad_Grid_Fin,Prints=False))

    print_best(Grad_Grid_Fin)

    sys.stdout = open(1, 'w')
    Forest_Grid_Fin.fit(data,y)
    print_best(Grad_Grid_Fin)
    print('Done with grid :)')
    sys.stdout = saved_std
    scores.append([print_best(Forest_Grid_Fin,Prints=False)])

    print_best(Forest_Grid_Fin)
    print("\n",pair)

0.7702513487952218 {'Gradient__learning_rate': 0.05, 'Gradient__max_depth': 2, 'Gradient__n_estimators': 600, 'Gradient__random_state': 42}
0.7710090271179103 {'Forest__max_depth': 12, 'Forest__n_estimators': 500, 'Forest__random_state': 42}

 (['Sex', 'Pclass', 'Embarked', 'Under_17'], ['SibSp', 'Age', 'Fare'])


In [None]:
Best_Pairs = [(['Sex', 'Pclass', 'Embarked'], ['SibSp', 'Age', 'Fare']),
              (['Sex', 'Pclass', 'Embarked', 'Under_17', 'Has_SibSp'], ['Fare'])]

In [None]:
params_Grad_Fin = [
    dict(Gradient__n_estimators = [400,600,700],
         Gradient__max_depth=[2,3], 
         Gradient__random_state=[42],
         Gradient__learning_rate=[0.1,0.05]),
    dict(Gradient__n_estimators = [100,200],
         Gradient__max_depth=[8,9,10,12], 
         Gradient__random_state=[42],
         Gradient__learning_rate=[0.1,0.05])
                  ]


params_Frst_Fin = dict(Forest__n_estimators=[200,350,500],
                   Forest__max_depth=[None,8,9,10],
                   Forest__random_state=[42])


In [192]:
for index,pair in enumerate(Best_Pairs):
    Preprocess2 = ColumnTransformer([('Categorical_cols', Ctgrcl_trns,pair[0]),
                                 ('Numrecial_cols',Nmrcl_trns,pair[1])])
    
    Grad_Fin = GradientBoostingClassifier(warm_start=True)
    Pipe_Grad_Fin = Pipeline([
        ('Preprocessing', Preprocess2),
        ('Gradient', Gradient)
    ])

    Forest_Fin = RandomForestClassifier(warm_start=True)
    Pipe_Forest_Fin = Pipeline([
        ('Preprocessing', Preprocess2),
        ('Forest', Forest)
    ])
    
    Grad_Grid_Fin = GridSearchCV(Pipe_Grad_Fin,param_grid=params_Grad_Fin[index],scoring='f1',cv=10,verbose=10,n_jobs=1)
    Forest_Grid_Fin = GridSearchCV(Pipe_Forest_Fin,param_grid=params_Frst_Fin,scoring='f1',cv=10,verbose=10,n_jobs=1)

    sys.stdout = open(1, 'w')
    Grad_Grid_Fin.fit(data,y)
    print_best(Grad_Grid_Fin)
    print('Done with grid :)')
    sys.stdout = saved_std
    scores.append(print_best(Grad_Grid_Fin,Prints=False))

    print_best(Grad_Grid_Fin)
    print(metrics.confusion_matrix(y,Grad_Grid_Fin.predict(data)))
    pd.Series(Grad_Grid_Fin.predict(test), index=test.index, name='Survived'
             ).to_csv('Predictions/Best_Pred_Grad_{}.csv'.format(index + 1))
    
    sys.stdout = open(1, 'w')
    Forest_Grid_Fin.fit(data,y)
    print_best(Grad_Grid_Fin)
    print('Done with grid :)')
    sys.stdout = saved_std
    scores.append(print_best(Forest_Grid_Fin,Prints=False))

    print_best(Forest_Grid_Fin)
    print(metrics.confusion_matrix(y,Forest_Grid_Fin.predict(data)))
    pd.Series(Forest_Grid_Fin.predict(test), index=test.index, name='Survived'
             ).to_csv('Predictions/Best_Pred_Forest_{}.csv'.format(index + 1))
    
    print("\n",pair)

0.7702513487952218 {'Gradient__learning_rate': 0.05, 'Gradient__max_depth': 2, 'Gradient__n_estimators': 600, 'Gradient__random_state': 42}
[[519  30]
 [ 66 276]]
0.7731677336618445 {'Forest__max_depth': 9, 'Forest__n_estimators': 200, 'Forest__random_state': 42}
[[538  11]
 [ 44 298]]

 (['Sex', 'Pclass', 'Embarked'], ['SibSp', 'Age', 'Fare'])
0.7603701395235238 {'Gradient__learning_rate': 0.05, 'Gradient__max_depth': 9, 'Gradient__n_estimators': 100, 'Gradient__random_state': 42}
[[530  19]
 [ 49 293]]
0.7608696728957778 {'Forest__max_depth': 8, 'Forest__n_estimators': 500, 'Forest__random_state': 42}
[[527  22]
 [ 71 271]]

 (['Sex', 'Pclass', 'Embarked', 'Under_17', 'Has_SibSp'], ['Fare'])
