In [135]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

In [136]:
titanic = pd.read_csv("titanic_cleaned.csv")

In [137]:
X = titanic.drop("Survived", axis=1)
y = titanic["Survived"]

In [138]:
X.head()

Unnamed: 0,Age,Fare,FamilySize,Embarked_C,Embarked_Q,Embarked_S,Sex_female,Sex_male,Pclass_1,Pclass_2,Pclass_3
0,-0.534891,-0.502445,1,0,0,1,0,1,0,0,1
1,0.668392,0.786845,1,1,0,0,1,0,1,0,0
2,-0.23407,-0.488854,0,0,0,1,1,0,0,0,1
3,0.442776,0.42073,1,0,0,1,1,0,1,0,0
4,0.442776,-0.486337,0,0,0,1,0,1,0,0,1


In [139]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [140]:
X_train.shape

(712, 11)

In [141]:
models = [
    ("DecisionTree", DecisionTreeClassifier()),
    ("RandomForest", RandomForestClassifier()),
    ("SVM", SVC(kernel="poly")),
]

In [142]:
def train(model, X_train, y_train):
    model.fit(X_train, y_train)
    return model

In [143]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print(f"classification report {classification_report(y_test, y_pred)}")
    print(f"confusion matrix {confusion_matrix(y_test, y_pred)}")
    print(f"accuracy score {accuracy_score(y_test, y_pred)}")

In [144]:
def train_and_evuate_model(models):
    for name, model in models:
        print(f"Model: {name}")
        model = train(model, X_train, y_train)
        evaluate_model(model, X_test, y_test)
        print("=" * 50)

In [145]:
train_and_evuate_model(models=models)

Model: DecisionTree
classification report               precision    recall  f1-score   support

           0       0.83      0.75      0.79       105
           1       0.69      0.78      0.73        74

    accuracy                           0.77       179
   macro avg       0.76      0.77      0.76       179
weighted avg       0.77      0.77      0.77       179

confusion matrix [[79 26]
 [16 58]]
accuracy score 0.7653631284916201
Model: RandomForest
classification report               precision    recall  f1-score   support

           0       0.86      0.85      0.85       105
           1       0.79      0.80      0.79        74

    accuracy                           0.83       179
   macro avg       0.82      0.82      0.82       179
weighted avg       0.83      0.83      0.83       179

confusion matrix [[89 16]
 [15 59]]
accuracy score 0.8268156424581006
Model: SVM
classification report               precision    recall  f1-score   support

           0       0.81      0.88 