In [1]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

# Step 1: Load the Titanic dataset
url = "https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv"
titanic = pd.read_csv(url)

# Step 2: Pre-processing
# Drop irrelevant columns and handle missing values
titanic = titanic.drop(['Name', 'Ticket', 'Cabin', 'PassengerId'], axis=1)
titanic = titanic.dropna()

# Convert categorical features to numerical
titanic = pd.get_dummies(titanic, columns=['Sex', 'Embarked'], drop_first=True)

# Separate features and target variable
X = titanic.drop('Survived', axis=1)
y = titanic['Survived']

# Standardize the features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Step 3: Create kNN and SVM models
knn_model = KNeighborsClassifier()
svm_model = SVC()

# Step 4: Cross-validation
k_fold = 5
stratified_k_fold = StratifiedKFold(n_splits=k_fold)

# kNN model cross-validation
knn_scores = cross_val_score(knn_model, X, y, cv=k_fold)
avg_knn_accuracy = knn_scores.mean()

# SVM model cross-validation
svm_scores = cross_val_score(svm_model, X, y, cv=stratified_k_fold)
avg_svm_accuracy = svm_scores.mean()

# Display the results
print(f"Average kNN Accuracy (k-fold): {avg_knn_accuracy:.2f}")
print(f"Average SVM Accuracy (stratified k-fold): {avg_svm_accuracy:.2f}")


KeyError: ignored