# Titanic Dead or Alive Prediction

In [3]:
# Imports
import pandas as pd
from sklearn.preprocessing import OneHotEncoder , StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split , GridSearchCV
from sklearn.metrics import f1_score

In [4]:
# Load Dataset
df1 = pd.read_csv(r"C:\Users\sailj\OneDrive\文档\GitHub\classification_model_1\Titanic-Dataset.csv")
num_cols = ['Pclass', 'Age', 'SibSp', 'Parch', 'Fare']
cat_cols = ['Sex', 'Embarked']

In [5]:
# Handling unkown
df1["Age"] = df1["Age"].fillna(df1["Age"].median())
df1["Embarked"] = df1["Embarked"].fillna(df1["Embarked"].mode()[0])

In [6]:
# Train Test Split
x = df1.drop(["PassengerId" , "Name" , "Survived" , "Cabin" , "Ticket"] , axis=1)
y = df1["Survived"]
xtrain , xtest , ytrain , ytest = train_test_split(x , y , test_size=0.2 , random_state=42)

In [7]:
# Pipeline
preprocess = ColumnTransformer([("num" , StandardScaler() , num_cols) , ("cat" , OneHotEncoder(drop="first" , handle_unknown="ignore") , cat_cols)])

pipeline = Pipeline([("preprocess" , preprocess) , ("logreg" , LogisticRegression(max_iter=10000))])

In [None]:
#Grid SearchCV
param_grid = {"logreg__C" : [0.001 , 0.01 , 0.1 , 1 ,10 ,100] , "logreg__penalty" : ["l2"] , "logreg__solver" : ["lbfgs"]}

grid = GridSearchCV(pipeline, param_grid , cv=5 , scoring="f1")

grid.fit(xtrain , ytrain)

model = grid.best_estimator_

In [11]:
# Model Prediction and Evaluation
ypred = model.predict(xtest)
print(ypred)
print()

f1Score = f1_score(ytest , ypred)
print("F1 Score : ",f1Score)

[0 0 0 1 1 1 1 0 1 1 0 0 0 0 0 1 0 1 0 0 0 0 1 0 0 0 0 0 0 1 0 1 1 1 0 0 0
 1 1 0 0 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 0 1 1 1 0 1 1 0 0 1 0 0 0 1 1 1 1 1
 0 0 1 1 1 0 0 1 1 0 0 0 1 1 0 0 0 0 0 0 0 0 0 0 1 0 0 0 1 0 0 0 1 0 0 0 1
 0 1 0 1 0 0 0 1 0 0 1 1 0 0 0 1 1 1 0 1 0 0 1 0 1 1 0 0 1 0 1 0 0 1 1 0 0
 1 0 0 0 0 1 0 0 0 1 1 1 0 0 0 1 0 0 0 1 0 0 1 1 0 1 0 0 0 1 1]

F1 Score :  0.7552447552447552


In [10]:
# Hence Model perform best with GridSearchCV and is able to predict Dead(0) Or Survived(1) in Titanic