## Best Model Selection

In [5]:
# Ignore Warnings
import warnings
warnings.filterwarnings('ignore')

# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = sns.load_dataset('titanic')
X = df[['pclass','sex','age','sibsp','parch','fare']]
y = df['survived']
X = pd.get_dummies(X, columns=['sex'])
X.age.fillna(value=X['age'].mean(), inplace=True)


from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

models =[LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(),KNeighborsClassifier()]
models_name = ['Logistic regression','SVM','Decision Tree','Random Forest','KNN']


models_score =[]
for model, model_name in zip(models, models_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    accuracy = accuracy_score(y_test, y_pred)
    models_score.append([model_name, accuracy])

sorted_models = sorted(models_score, key=lambda x:x[1], reverse=True)
for model in sorted_models:
    print("Accuracy Score:", f'{model[0]} : {model[1]:.2f}')

Accuracy Score: Random Forest : 0.83
Accuracy Score: Logistic regression : 0.80
Accuracy Score: Decision Tree : 0.79
Accuracy Score: KNN : 0.75
Accuracy Score: SVM : 0.72


## Model selection with precison score 

In [7]:
# Ignore Warnings
import warnings
warnings.filterwarnings('ignore')

# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = sns.load_dataset('titanic')
X = df[['pclass','sex','age','sibsp','parch','fare']]
y = df['survived']
X = pd.get_dummies(X, columns=['sex'])
X.age.fillna(value=X['age'].mean(), inplace=True)


from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models =[LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(),KNeighborsClassifier()]
models_name = ['Logistic regression','SVM','Decision Tree','Random Forest','KNN']


models_score =[]
for model, model_name in zip(models, models_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Precision = precision_score(y_test, y_pred)
    models_score.append([model_name, Precision])

sorted_models = sorted(models_score, key=lambda x:x[1], reverse=True)
for model in sorted_models:
    print("Precision Score:", f'{model[0]} : {model[1]:.2f}')

Precision Score: Logistic regression : 0.80
Precision Score: Random Forest : 0.80
Precision Score: SVM : 0.76
Precision Score: Decision Tree : 0.74
Precision Score: KNN : 0.66


## Based on Recall Sacore

In [9]:
# Ignore Warnings
import warnings
warnings.filterwarnings('ignore')

# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = sns.load_dataset('titanic')
X = df[['pclass','sex','age','sibsp','parch','fare']]
y = df['survived']
X = pd.get_dummies(X, columns=['sex'])
X.age.fillna(value=X['age'].mean(), inplace=True)


from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

models =[LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(),KNeighborsClassifier()]
models_name = ['Logistic regression','SVM','Decision Tree','Random Forest','KNN']


models_score =[]
for model, model_name in zip(models, models_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Recall = recall_score(y_test, y_pred)
    models_score.append([model_name, Recall])

sorted_models = sorted(models_score, key=lambda x:x[1], reverse=True)
for model in sorted_models:
    print("Recall Score:", f'{model[0]} : {model[1]:.2f}')

Recall Score: Random Forest : 0.73
Recall Score: Logistic regression : 0.72
Recall Score: Decision Tree : 0.69
Recall Score: KNN : 0.54
Recall Score: SVM : 0.26


## Based on f1 Score

In [10]:
# Ignore Warnings
import warnings
warnings.filterwarnings('ignore')

# import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

df = sns.load_dataset('titanic')
X = df[['pclass','sex','age','sibsp','parch','fare']]
y = df['survived']
X = pd.get_dummies(X, columns=['sex'])
X.age.fillna(value=X['age'].mean(), inplace=True)


from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score, recall_score, precision_score
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0)

models =[LogisticRegression(), SVC(), DecisionTreeClassifier(), RandomForestClassifier(),KNeighborsClassifier()]
models_name = ['Logistic regression','SVM','Decision Tree','Random Forest','KNN']


models_score =[]
for model, model_name in zip(models, models_name):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    f1 = f1_score(y_test, y_pred)
    models_score.append([model_name, f1])

sorted_models = sorted(models_score, key=lambda x:x[1], reverse=True)
for model in sorted_models:
    print("f1_Score:", f'{model[0]} : {model[1]:.2f}')

f1_Score: Random Forest : 0.74
f1_Score: Logistic regression : 0.73
f1_Score: Decision Tree : 0.70
f1_Score: KNN : 0.67
f1_Score: SVM : 0.50
