## 1.Import models

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.linear_model import LogisticRegression
# from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

### A. Loading and Preprocessing Data for ML

In [None]:
import pandas as pd 
import seaborn as sns
df = sns.load_dataset('titanic')
# set X and Y
X = df[['pclass','sex','age','sibsp','parch','fare']]
y = df['survived']
# one hot encoding
X = pd.get_dummies(X,columns=['sex'])
# remove null 
X.age.fillna(value=X.age.mean(),inplace=True)
# train test split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=42)
# create model

## 2.Define the models to evaluate 

In [None]:
models = [LogisticRegression(),RandomForestClassifier(),KNeighborsClassifier(),SVC()]

names = ['LogisticRegression','RandomForest','KNN','SVC']

## 3.Perform k-fold cross-validation for each model

In [None]:
k = 10
for name , model in zip(names,models):
    cv_scores = cross_val_score(model,X_train,y_train,cv=k)
    print(f'{name} CV Accuracy : {np.mean(cv_scores):.3f}+/-{np.std(cv_scores):.3f}')

# 4.Apply Hyper Parameter Tuning on SVC
### The process of finding the optimal parameters for a machine learning model to maximize performance.

### It is done by Grid Search CV to Auto-check best parameter for model


In [None]:
from sklearn.model_selection import GridSearchCV

# define the parameter grid
param_grid = {'C':[0.1,1,10],
              'kernel':['linear','rbf'],
              'gamma':[0.1,1,10]}

grid_search = GridSearchCV(SVC(),param_grid,cv=5) # object creation
grid_search.fit(X_train,y_train) # train the model

In [None]:
print(f'Best Parameters : {grid_search.best_params_}')
print(f'Best CV Score : {grid_search.best_score_:.2f}')


# Accuracy Score of all models

In [None]:
model_score = []
for name , model in zip(names,models):
    model.fit(X_train,y_train)
    ypred = model.predict(X_test)
    accuracy = accuracy_score(y_test,ypred)
    model_score.append([name,accuracy])

### Sort the model based on score 

In [None]:
sorted_models = sorted(model_score,key=lambda x:x[1],reverse=True)

In [None]:
for model in sorted_models:
    print(f'{model[0]} : {model[1]:.2f}')