# Best Model Selection


In [None]:
# Import the necessary libraries
# import libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df = sns.load_dataset("titanic")
X = df[['pclass', 'sex', 'age', 'sibsp', 'parch', 'fare']]
y = df['survived']  # if you these code you have to change these three lines and you can use your own data of classification


X = pd.get_dummies(X, columns=['sex']) # this is called label and coding
X.age.fillna(value = X['age'].mean(), inplace=True)


from sklearn.linear_model import LogisticRegression #(classification)
from sklearn.svm import SVC#(support vector classification)
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    models_scores.append([model_name,accuracy])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True) # sort the models by their accuracy and lambda? minimal coding or low code .simplify the code
for model in sorted_models:
    print("Accuracy Score: ",f'{model[0]} : {model[1]:.2f}')


# Accuracy Score:  Random Forest : 0.81
# Accuracy Score:  Decision Tree : 0.79
# Accuracy Score:  KNN : 0.76
# Accuracy Score:  Logistic Regression : 0.75
# Accuracy Score:  SVM : 0.74

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X.age.fillna(value = X['age'].mean(), inplace=True)


Accuracy Score:  Logistic Regression : 0.81
Accuracy Score:  Random Forest : 0.80
Accuracy Score:  Decision Tree : 0.75
Accuracy Score:  KNN : 0.69
Accuracy Score:  SVM : 0.66


In [3]:
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']
models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Precision = precision_score(y_test, y_pred)
    models_scores.append([model_name,Precision])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Precision Score: ", f'{model[0]} : {model[1]:.2f}')

# Precision Score:  Random Forest : 0.80
# Precision Score:  Decision Tree : 0.78
# Precision Score:  KNN : 0.75
# Precision Score:  Logistic Regression : 0.74
# Precision Score:  SVM : 0.73

Precision Score:  Random Forest : 0.81
Precision Score:  Logistic Regression : 0.80
Precision Score:  SVM : 0.76
Precision Score:  Decision Tree : 0.72
Precision Score:  KNN : 0.66


In [4]:
models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']
models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Recall = recall_score(y_test, y_pred)
    models_scores.append([model_name,Recall])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("Recall Score: ",f'{model[0]} : {model[1]:.2f}')

# Recall Score:  Random Forest : 0.74
# Recall Score:  Decision Tree : 0.72
# Recall Score:  KNN : 0.68
# Recall Score:  Logistic Regression : 0.67
# Recall Score:  SVM : 0.65

Recall Score:  Logistic Regression : 0.72
Recall Score:  Random Forest : 0.72
Recall Score:  Decision Tree : 0.70
Recall Score:  KNN : 0.54
Recall Score:  SVM : 0.26


In [5]:

models = [LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(), KNeighborsClassifier()]
model_names = ['Logistic Regression', 'SVM', 'Decision Tree', 'Random Forest', 'KNN']
models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    F1 = f1_score(y_test, y_pred)
    models_scores.append([model_name,F1])

sorted_models = sorted(models_scores, key=lambda x: x[1], reverse=True)
for model in sorted_models:
    print("F1 Score: ",f'{model[0]} : {model[1]:.2f}')

# F1 Score:  Random Forest : 0.77
# F1 Score:  Decision Tree : 0.75
# F1 Score:  KNN : 0.71
# F1 Score:  Logistic Regression : 0.70
# F1 Score:  SVM : 0.68

F1 Score:  Logistic Regression : 0.76
F1 Score:  Random Forest : 0.75
F1 Score:  Decision Tree : 0.72
F1 Score:  KNN : 0.59
F1 Score:  SVM : 0.38
