# Best Model Selection

In [2]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df = sns.load_dataset('titanic')
X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y = df['survived']
X = pd.get_dummies(X, columns=['sex']) #(Label encoding)
X.age.fillna(value= X['age'].mean(), inplace=True)

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42) #what is random state and why it's value is taken as 42.
models = [LogisticRegression(), KNeighborsClassifier(), RandomForestClassifier(), DecisionTreeClassifier(), SVC()]
model_names = ['Logistic Regression', 'KNN', 'Random Forest','Decision tree', 'SVM']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    models_scores.append([model_name,accuracy])

sorted_models = sorted(models_scores, key = lambda x: x[1], reverse=True) # check out the lambda function
for model in sorted_models:
    print("Accuracy Score: ", f'{model[0]} : {model[1] :.2f}') # How it works


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X.age.fillna(value= X['age'].mean(), inplace=True)


Accuracy Score:  Logistic Regression : 0.81
Accuracy Score:  Random Forest : 0.80
Accuracy Score:  Decision tree : 0.78
Accuracy Score:  KNN : 0.69
Accuracy Score:  SVM : 0.66


In [7]:
models = [LogisticRegression(), KNeighborsClassifier(), RandomForestClassifier(), DecisionTreeClassifier(), SVC()]
model_names = ['Logistic Regression', 'KNN', 'Random Forest','Decision tree', 'SVM']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Precision = precision_score(y_test, y_pred)
    models_scores.append([model_name,Precision])

sorted_models = sorted(models_scores, key = lambda x: x[1], reverse=True) 
for model in sorted_models:
    print("Precision Score: ", f'{model[0]} : {model[1] :.2f}') 

Precision Score:  Logistic Regression : 0.80
Precision Score:  Random Forest : 0.78
Precision Score:  SVM : 0.76
Precision Score:  Decision tree : 0.72
Precision Score:  KNN : 0.66


In [9]:
models = [LogisticRegression(), KNeighborsClassifier(), RandomForestClassifier(), DecisionTreeClassifier(), SVC()]
model_names = ['Logistic Regression', 'KNN', 'Random Forest','Decision tree', 'SVM']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Recall = recall_score(y_test, y_pred)
    models_scores.append([model_name,Recall])
    

sorted_models = sorted(models_scores, key = lambda x: x[1], reverse=True) 
for model in sorted_models:
    print("Recall Score: ", f'{model[0]} : {model[1] :.2f}') 

Recall Score:  Logistic Regression : 0.72
Recall Score:  Random Forest : 0.72
Recall Score:  Decision tree : 0.72
Recall Score:  KNN : 0.54
Recall Score:  SVM : 0.26


In [10]:
models = [LogisticRegression(), KNeighborsClassifier(), RandomForestClassifier(), DecisionTreeClassifier(), SVC()]
model_names = ['Logistic Regression', 'KNN', 'Random Forest','Decision tree', 'SVM']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    F1 = f1_score(y_test, y_pred)
    models_scores.append([model_name,F1])

sorted_models = sorted(models_scores, key = lambda x: x[1], reverse=True) 
for model in sorted_models:
    print("F1 Score: ", f'{model[0]} : {model[1] :.2f}') 

F1 Score:  Random Forest : 0.76
F1 Score:  Logistic Regression : 0.76
F1 Score:  Decision tree : 0.71
F1 Score:  KNN : 0.59
F1 Score:  SVM : 0.38
