In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, KFold
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

In [3]:
titanic_data = pd.read_csv('/content/titanic_dataset.csv')

In [4]:
titanic_data.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'], inplace=True)  # Drop unnecessary columns
titanic_data.dropna(inplace=True)

In [5]:
label_encoders = {}
for column in ['Sex', 'Embarked']:
    label_encoders[column] = LabelEncoder()
    titanic_data[column] = label_encoders[column].fit_transform(titanic_data[column])

In [6]:
X = titanic_data.drop(columns=['Survived'])
Y = titanic_data['Survived']

In [7]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [8]:
X_train, X_test, Y_train, Y_test = train_test_split(X_scaled, Y, test_size=0.2, random_state=42)

In [9]:
knn_model = KNeighborsClassifier()

In [10]:
knn_model

In [11]:
svm_model =SVC()

In [13]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)
stratified_kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

In [15]:
knn_cv_scores = cross_val_score(knn_model, X_scaled, Y, cv=kf)
knn_avg_accuracy = knn_cv_scores.mean()

In [16]:
knn_cv_scores.mean()

0.792159952723333

In [18]:
svm_cv_scores = cross_val_score(svm_model, X_scaled, Y, cv=stratified_kf)
svm_avg_accuracy = svm_cv_scores.mean()

In [19]:
svm_cv_scores.mean()

0.8159755737220525

In [20]:
print("Average Accuracy (kNN):", knn_avg_accuracy)
print("Average Accuracy (SVM):", svm_avg_accuracy)

Average Accuracy (kNN): 0.792159952723333
Average Accuracy (SVM): 0.8159755737220525
