In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import LeaveOneOut
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, f1_score, roc_auc_score, confusion_matrix, roc_curve
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler




df = pd.read_csv('C:\\Users\\ishig\\anaconda_projects\\train.csv')


x = df.drop('Survived', axis=1)  
y = df['Survived'] 


print("Dataset Preview:")
print(df.head())


x_numeric = x.select_dtypes(include=[np.number])  
x_numeric = x_numeric.fillna(x_numeric.mean()) 


x_categorical = x.select_dtypes(exclude=[np.number])  
x_categorical =x_categorical.fillna('Unknown')  

x = pd.concat([x_numeric, x_categorical], axis=1)


x = pd.get_dummies(x, drop_first=True) 

scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)


print("Shape of x_scaled:", x_scaled.shape)

loo = LeaveOneOut()


y_true, y_pred = list(), list()
y_prob = np.empty((0, 2))


classifier_SVM = SVC(kernel='rbf', random_state=0, probability=True)


sumAcc = 0

for fold, (train_index, test_index) in enumerate(loo.split(x_scaled), start=1):
    print(f"Fold {fold}:")
    # Split into training and testing sets
    x_train, x_test = x_scaled[train_index], x_scaled[test_index]
    y_train, y_test = y.iloc[train_index], y.iloc[test_index]
    
   
classifier_SVM.fit(x_train, y_train)

yhat = classifier_SVM.predict(x_test)
yprob = classifier_SVM.predict_proba(x_test)
    
   
y_true.append(y_test.iloc[0])  # Actual value
y_pred.append(yhat[0])  # Predicted value
y_prob = np.append(y_prob, yprob, axis=0)  
   
acc = accuracy_score(y_true, y_pred)
print(f"Accuracy for Fold {fold}: {acc:.3f}")

sumAcc += acc


overall_accuracy = sumAcc / len(loo.split(x_scaled))
print(f'Overall Accuracy: {overall_accuracy:.3f}')


svm_prc_loo = precision_score(y_true, y_pred)
svm_f1_loo = f1_score(y_true, y_pred)
svm_auc_loo = roc_auc_score(y_true, y_pred)


cm = confusion_matrix(y_true, y_pred)
print("Confusion Matrix is:")
print(cm)


fpr_svm, tpr_svm, thresh_svm = roc_curve(y_true, y_prob[:, 1], pos_label=1)


plt.style.use('seaborn')
plt.plot(fpr_svm, tpr_svm, linestyle='--', label='SVM', color='orange')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve')
plt.legend(loc='best')
plt.show()


 print(f"ROC AUC Score: {svm_auc_loo:.3f})
