## SVM with Grid Search and Cross Validation

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt

In [None]:
df_train = pd.read_csv("Cleaned_Train.csv")
df_test = pd.read_csv("Cleaned_Test.csv")

In [34]:
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

df_train['Age'].fillna(df_train['Age'].median(), inplace=True)
df_train['Embarked'].fillna(df_train['Embarked'].mode()[0], inplace=True)

# Encode categorical features ('Sex' and 'Embarked')
df_train['Sex'] = df_train['Sex'].map({'male': 0, 'female': 1})
df_train = pd.get_dummies(df_train, columns=['Embarked'], drop_first=True)

X = df_train[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_Q', 'Embarked_S']]
y = df_train['Survived']

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

param_grid = {
    'C': [0.1, 1, 10, 100],
    'kernel': ['linear', 'rbf', 'poly'],
    'gamma': ['scale', 'auto', 0.1, 1]
}

grid_search = GridSearchCV(SVC(), param_grid, cv=5, scoring='accuracy', verbose=1)

grid_search.fit(X_train_scaled, y_train)

print("Best Parameters from Grid Search:", grid_search.best_params_)
print("Best Cross-validation Score from Grid Search:", grid_search.best_score_)

best_svm = grid_search.best_estimator_

y_pred = best_svm.predict(X_val_scaled)
from sklearn.metrics import classification_report

print("\nClassification Report:")
print(classification_report(y_val, y_pred))

Fitting 5 folds for each of 48 candidates, totalling 240 fits
Best Parameters from Grid Search: {'C': 1, 'gamma': 'scale', 'kernel': 'rbf'}
Best Cross-validation Score from Grid Search: 0.8314291342460358

Classification Report:
              precision    recall  f1-score   support

         0.0       0.81      0.90      0.86       105
         1.0       0.84      0.70      0.76        74

    accuracy                           0.82       179
   macro avg       0.83      0.80      0.81       179
weighted avg       0.82      0.82      0.82       179



## Pridection

In [38]:
df_new = pd.DataFrame({
    'Pclass': [1, 3, 2], 
    'Sex': ['female', 'male', 'female'],
    'Age': [25, 30, 22],
    'SibSp': [0, 1, 0],
    'Parch': [0, 0, 0],
    'Fare': [71.2833, 7.9250, 26.0],
    'Embarked': ['C', 'S', 'S']
})

# Preprocess the new data (handle missing values and encode categorical variables)
df_new['Sex'] = df_new['Sex'].map({'male': 0, 'female': 1})
df_new = pd.get_dummies(df_new, columns=['Embarked'], drop_first=True)

for col in ['Embarked_Q', 'Embarked_S']:
    if col not in df_new.columns:
        df_new[col] = 0  # If a column is missing, add it with default value 0


df_new_scaled = scaler.transform(df_new[['Pclass', 'Sex', 'Age', 'SibSp', 'Parch', 'Fare', 'Embarked_Q', 'Embarked_S']])

y_new_pred = best_svm.predict(df_new_scaled)

df_new['Predicted_Survived'] = y_new_pred
styled_df = df_new.style.format({
    'Age': '{:.1f}',  
    'Fare': '{:.2f}',  
    'Predicted Survival': lambda x: 'Survived' if x == 1 else 'Not Survived'  # Convert 1/0 to "Survived" or "Not Survived"
})

styled_df


Unnamed: 0,Pclass,Sex,Age,SibSp,Parch,Fare,Embarked_S,Embarked_Q,Predicted_Survived
0,1,1,25.0,0,0,71.28,False,0,1.0
1,3,0,30.0,1,0,7.92,True,0,0.0
2,2,1,22.0,0,0,26.0,True,0,1.0
