In [16]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

#Loading Data

In [23]:
df = sns.load_dataset('titanic')

In [24]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,deck,embark_town,alive,alone
0,0,3,male,22.0,1,0,7.25,S,Third,man,True,,Southampton,no,False
1,1,1,female,38.0,1,0,71.2833,C,First,woman,False,C,Cherbourg,yes,False
2,1,3,female,26.0,0,0,7.925,S,Third,woman,False,,Southampton,yes,True
3,1,1,female,35.0,1,0,53.1,S,First,woman,False,C,Southampton,yes,False
4,0,3,male,35.0,0,0,8.05,S,Third,man,True,,Southampton,no,True


#Data Preprocessing

In [25]:
df['sex'] = df['sex'].map(
    {
        'male' : 1,
        'female' : 0
                          }
)

df['embarked'] = df['embarked'].map(
    {
        'S' : 0,
        'C' : 1,
        'Q' : 2
    }
)
df['who'] = df['who'].map(
    {
        'woman' : 0,
        'man' : 1,
        'child' : 2
                          }
)
df['alive'] = df['alive'].map(
    {
        'yes' : 1,
        'no' : 0
                          }
)
df['class'] = df['class'].map(
    {
        'First' : 0,
        'Second' : 1,
        'Third' : 2
                          }
)
df['embarked'] = df['embarked'].fillna(0)
df['age'] = df['age'].fillna(df['age'].median())
df['deck'] = df['deck'].cat.add_categories('U').fillna('U')
df = pd.get_dummies(df, columns=['deck'], prefix='deck', dtype=int)
df['embark_town'] = df['embark_town'].fillna('Unknown')
df = pd.get_dummies(df, columns=['embark_town'], prefix='embark_town', dtype=int)
df['adult_male'] = df['adult_male'].astype(int)
df['alone'] = df['alone'].astype(int)

In [27]:
df.head()

Unnamed: 0,survived,pclass,sex,age,sibsp,parch,fare,embarked,class,who,adult_male,alive,alone,deck_A,deck_B,deck_C,deck_D,deck_E,deck_F,deck_G,deck_U,embark_town_Cherbourg,embark_town_Queenstown,embark_town_Southampton,embark_town_Unknown
0,0,3,1,22.0,1,0,7.25,0.0,2,1,1,0,0,0,0,0,0,0,0,0,1,0,0,1,0
1,1,1,0,38.0,1,0,71.2833,1.0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0,0
2,1,3,0,26.0,0,0,7.925,0.0,2,0,0,1,1,0,0,0,0,0,0,0,1,0,0,1,0
3,1,1,0,35.0,1,0,53.1,0.0,0,0,0,1,0,0,0,1,0,0,0,0,0,0,0,1,0
4,0,3,1,35.0,0,0,8.05,0.0,2,1,1,0,1,0,0,0,0,0,0,0,1,0,0,1,0


Splitting into Training and Testing data sets and Scaling

In [59]:
df.isnull().sum() #Checking for NaNs

survived                   0
pclass                     0
sex                        0
age                        0
sibsp                      0
parch                      0
fare                       0
embarked                   0
class                      0
who                        0
adult_male                 0
alive                      0
alone                      0
deck_A                     0
deck_B                     0
deck_C                     0
deck_D                     0
deck_E                     0
deck_F                     0
deck_G                     0
deck_U                     0
embark_town_Cherbourg      0
embark_town_Queenstown     0
embark_town_Southampton    0
embark_town_Unknown        0
dtype: int64

In [34]:
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler

In [56]:
X = df.drop('survived', axis=1)
y = df['survived']

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size= 0.2,
    random_state= 42,
    stratify= y
)
sc = StandardScaler()
X_train_scaled = sc.fit_transform(X_train)
X_test_scaled = sc.transform(X_test)

**KNN**

In [57]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_scaled, y_train)
y_pred = knn.predict(X_test_scaled)
print(classification_report(y_test, y_pred))


              precision    recall  f1-score   support

           0       0.96      0.98      0.97       110
           1       0.97      0.94      0.96        69

    accuracy                           0.97       179
   macro avg       0.97      0.96      0.96       179
weighted avg       0.97      0.97      0.97       179



In [60]:
X_scaled = sc.fit_transform(X)
scores = cross_val_score(knn, X_scaled, y, cv=5)
print("Cross-validation scores:", scores)
print("Mean accuracy:", scores.mean())

Cross-validation scores: [0.96089385 0.93820225 0.96067416 0.92134831 0.96629213]
Mean accuracy: 0.9494821417362376


**SVM**

In [58]:
from sklearn.svm import SVC
svm = SVC(kernel='rbf', gamma='scale')
svm.fit(X_train_scaled, y_train)
y_pred_svm = svm.predict(X_test_scaled)
print(classification_report(y_test, y_pred_svm))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       110
           1       1.00      1.00      1.00        69

    accuracy                           1.00       179
   macro avg       1.00      1.00      1.00       179
weighted avg       1.00      1.00      1.00       179



In [62]:
scores = cross_val_score(svm, X_scaled, y, cv=5)
print("Cross-validation scores:", scores)
print("Mean accuracy:", scores.mean())

Cross-validation scores: [0.98324022 0.98876404 0.99438202 0.98314607 1.        ]
Mean accuracy: 0.9899064716590296


**Logistic Regression**

In [43]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(max_iter=1000)
lr.fit(X_train_scaled, y_train)
y_pred_lr = lr.predict(X_test_scaled)
print(classification_report(y_test, y_pred_lr))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00       110
           1       1.00      1.00      1.00        69

    accuracy                           1.00       179
   macro avg       1.00      1.00      1.00       179
weighted avg       1.00      1.00      1.00       179



In [44]:
scores = cross_val_score(lr, X_scaled, y, cv=5)
print("Cross-validation scores:", scores)
print("Mean accuracy:", scores.mean())

Cross-validation scores: [1. 1. 1. 1. 1.]
Mean accuracy: 1.0
