## Best Model Selection

In [8]:
# ipmort libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

df = sns.load_dataset("titanic")
X = df[['pclass','sex','age','sibsp','parch','fare']]
y = df['survived']
X = pd.get_dummies(X, columns=['sex'])
X.age.fillna(value=X['age'].mean(), inplace=True)

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models=[LogisticRegression(),DecisionTreeClassifier(),KNeighborsClassifier(),RandomForestClassifier(),SVC()]
model_names = ["Logistic Regression", "Decision Tree", "KNN", "Random Forest", "SVM"]

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    models_scores.append([model_name,accuracy])
    
sorted_models = sorted(models_scores, key=lambda x:x[1], reverse=True)
for model in sorted_models:
        print("Accuracy Score:", f'{model[0]}:{model[1]:.2f}')


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  X.age.fillna(value=X['age'].mean(), inplace=True)


Accuracy Score: Logistic Regression:0.81
Accuracy Score: Random Forest:0.80
Accuracy Score: Decision Tree:0.77
Accuracy Score: KNN:0.69
Accuracy Score: SVM:0.66


write the "minimum coding" in your CV.
 

In [15]:
models=[LogisticRegression(),DecisionTreeClassifier(),KNeighborsClassifier(),RandomForestClassifier(),SVC()]
model_names = ["Logistic Regression", "Decision Tree", "KNN", "Random Forest", "SVM"]

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Precision = precision_score(y_test, y_pred)
    models_scores.append([model_name,accuracy])
    
sorted_models = sorted(models_scores, key=lambda x:x[1], reverse=True)
for model in sorted_models:
        print("Precision score:", f'{model[0]}:{model[1]:.2f}')

Precision score: Logistic Regression:0.66
Precision score: Decision Tree:0.66
Precision score: KNN:0.66
Precision score: Random Forest:0.66
Precision score: SVM:0.66


In [14]:
models=[LogisticRegression(),DecisionTreeClassifier(),KNeighborsClassifier(),RandomForestClassifier(),SVC()]
model_names = ["Logistic Regression", "Decision Tree", "KNN", "Random Forest", "SVM"]

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    recall = recall_score(y_test, y_pred)
    models_scores.append([model_name,accuracy])
    
sorted_models = sorted(models_scores, key=lambda x:x[1], reverse=True)
for model in sorted_models:
        print("recall score:", f'{model[0]}:{model[1]:.2f}')

recall score: Logistic Regression:0.66
recall score: Decision Tree:0.66
recall score: KNN:0.66
recall score: Random Forest:0.66
recall score: SVM:0.66


In [16]:
models=[LogisticRegression(),DecisionTreeClassifier(),KNeighborsClassifier(),RandomForestClassifier(),SVC()]
model_names = ["Logistic Regression", "Decision Tree", "KNN", "Random Forest", "SVM"]

models_scores = []
for model, model_name in zip(models, model_names):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    models_scores.append([model_name,accuracy])
    
sorted_models = sorted(models_scores, key=lambda x:x[1], reverse=True)
for model in sorted_models:
        print("F1 Score:", f'{model[0]}:{model[1]:.2f}')

F1 Score: Logistic Regression:0.66
F1 Score: Decision Tree:0.66
F1 Score: KNN:0.66
F1 Score: Random Forest:0.66
F1 Score: SVM:0.66
