In [None]:
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis

lr = LogisticRegression(random_state=42)
knn = KNeighborsClassifier()
dt = DecisionTreeClassifier()
rf = RandomForestClassifier()
ada = AdaBoostClassifier()
xgb = XGBClassifier(eval_metric='logloss')
sgd = SGDClassifier(loss='log_loss', random_state=42)
lda = LinearDiscriminantAnalysis()
qda = QuadraticDiscriminantAnalysis()

# parameter for KNN
para_knn={'n_neighbors':np.arange(1,50)}
grid_knn=GridSearchCV(knn,param_grid=para_knn,cv=5)

#parameter for decision tree
para_dt={'criterion':['gini','entropy'],'max_depth':np.arange(1,50),
         'min_samples_leaf':[1,2,4,5,10,20,30,40,50,80,100]}
grid_dt=GridSearchCV(dt,param_grid=para_dt,cv=5)

#parameter for Random Forest
params_rf={'n_estimators':[100,200,350,500],
           'min_samples_leaf':[2,10,30,50,80,100]}
grid_rf=GridSearchCV(rf,param_grid=params_rf,cv=5)

#parameters for AdaBoost
params_ada={
    'algorithm': ['SAMME'],
    'n_estimators':[50,100,250,400,500],
    'learning_rate':[0.1,0.001,0.2,0.5,0.8,1]}
grid_ada=GridSearchCV(ada,param_grid=params_ada,cv=5)

# paraameter for XGBoost
params_xgb={'n_estimators':[50,100,250,600,800,1000],
           'learning_rate':[0.1,0.001,0.2,0.5,0.8,1]}
rs_xgb=RandomizedSearchCV(xgb,param_distributions=params_xgb,cv=5)

In [None]:
x_train = train_df.drop(["Survived"], axis=1)
y_train = train_df["Survived"]

x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.3, random_state=42, stratify=y_train)

grid_dt.fit(x_train,y_train)
grid_rf.fit(x_train,y_train)
grid_ada.fit(x_train,y_train)
rs_xgb.fit(x_train,y_train)
sgd.fit(x_train,y_train)
lda.fit(x_train,y_train)
qda.fit(x_train,y_train)

print("Best parameters for Decision Tree:", grid_dt.best_params_)
print("Best parameters for Random Forest:", grid_rf.best_params_)
print("Best parameters for AdaBoost:", grid_ada.best_params_)
print("Best parameters for XGBoost:", rs_xgb.best_params_)

In [None]:
dt = grid_dt.best_estimator_
rf = grid_rf.best_estimator_
ada = grid_ada.best_estimator_
xgb = rs_xgb.best_estimator_

In [None]:
classifiers = [('Decision Tree', dt), ('Random Forest', rf), ('AdaBoost', ada),
               ('XGBoost', xgb), ('SGD', sgd), ('LDA', lda), ('QDA', qda)]

In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

for classifier_name, classifier in classifiers:
    # fit classifier
#     classifier.fit(x_train, y_train)
    
    # predict labels for test set
    y_pred = classifier.predict(x_val)
    
    print(f'{classifier_name} classification report')
    print(classification_report(y_val, y_pred))
    
    cm = confusion_matrix(y_val, y_pred)
    ax = plt.subplot()
    sns.heatmap(cm, annot=True, ax=ax, cmap='Blues')
    ax.set_xlabel('Predicted')
    ax.set_ylabel('Actual')
    ax.set_title(classifier_name)
    plt.show()