In [None]:
# ================================
# 1. Import Libraries
# ================================
import seaborn as sns
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report

# ================================
# 2. Load Dataset
# ================================
df = sns.load_dataset("titanic")

# Keep a few useful columns
df = df[["survived", "pclass", "sex", "age", "fare", "alone"]]

# Drop rows with missing values
df = df.dropna()

# ================================
# 3. Transformations
# ================================

# Encoding categorical variables
df["sex"] = df["sex"].map({"male": 0, "female": 1})
df["alone"] = df["alone"].astype(int)

# Log transform for skewed feature
df["fare_log"] = np.log1p(df["fare"])   # log(1+fare)

# ================================
# 4. Split Data
# ================================
X = df[["pclass", "sex", "age", "fare_log", "alone"]]
y = df["survived"]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# ================================
# 5. Scaling
# ================================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)




## SVM Model (baseline model)

In [None]:
from sklearn.svm import SVC

model = SVC()  # RBF kernel works best usually

# Train
model.fit(X_train_scaled, y_train)

# Predict
y_pred = model.predict(X_test_scaled)


# 7. Evaluation

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.7832167832167832

Classification Report:
              precision    recall  f1-score   support

           0       0.80      0.86      0.83        87
           1       0.76      0.66      0.70        56

    accuracy                           0.78       143
   macro avg       0.78      0.76      0.77       143
weighted avg       0.78      0.78      0.78       143



## **Hyperparameter Tuning**

## Key SVM Parameters

### **1. C (Regularization strength)**

* Controls how much the model tolerates misclassified points.
* **Small C** → more regularization → wider margin → model is simpler but may underfit.
* **Large C** → less regularization → tries to classify every point correctly → may overfit.

---

### **2. kernel**

Defines how the model draws the decision boundary.

* **linear** → straight line boundary; best when data is linearly separable.
* **rbf** → curved/complex boundaries; works best for most datasets.
* **poly** → polynomial curved boundaries; degree controls complexity.
* **sigmoid** → rarely useful; behaves like neural networks.

---

### **3. gamma (only for rbf, poly, sigmoid)**

Controls how far influence of a single training point spreads.

* **Low gamma** → broader influence → smoother decision boundary.
* **High gamma** → small influence → boundary becomes very tight around points → risk of overfitting.

---

### **4. degree (only for poly kernel)**

* Power of the polynomial.
* **Higher degree → more complex curves** → more overfitting risk.

---

### **5. coef0 (poly & sigmoid kernels)**

* Controls how much influence higher-degree vs lower-degree polynomial terms have.
* Most beginners don’t need to tune this.

---

### **6. probability**

* Set `probability=True` if you want probability estimates (like predict_proba).
* Makes training slower.

---


In [None]:
model = SVC(kernel="rbf", C=3, gamma="scale")  # adjust the parameters

# Train
model.fit(X_train_scaled, y_train)

# Predict
y_pred = model.predict(X_test_scaled)


# 7. Evaluation

print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.7762237762237763

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.82      0.82        87
           1       0.71      0.71      0.71        56

    accuracy                           0.78       143
   macro avg       0.77      0.77      0.77       143
weighted avg       0.78      0.78      0.78       143



### Random Search

In [None]:
from sklearn.model_selection import RandomizedSearchCV

param_grid={
    'C':[0.01,0.1,1,2,10],
    'kernel':['linear','rbf','poly'],
    'degree':[3,4,5],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1]
}


random_svmmodel=RandomizedSearchCV(SVC(random_state=42),param_grid,cv=3,scoring='accuracy',n_iter=30)
random_svmmodel.fit(X_train_scaled, y_train)
print("BEST PARAMETERS Random Search:",random_svmmodel.best_params_)
print("BEST SCORE Randome search:",random_svmmodel.best_score_)

BEST PARAMETERS Random Search: {'kernel': 'rbf', 'gamma': 0.1, 'degree': 5, 'C': 10}
BEST SCORE Randome search: 0.8248920731147239


In [None]:
random_best_model = random_svmmodel.best_estimator_

y_pred = random_best_model.predict(X_test_scaled)


# Evaluate
from sklearn.metrics import accuracy_score, classification_report

print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Test Accuracy: 0.7622377622377622

Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.83      0.81        87
           1       0.71      0.66      0.69        56

    accuracy                           0.76       143
   macro avg       0.75      0.74      0.75       143
weighted avg       0.76      0.76      0.76       143



### Grid Search

In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC

# Parameter grid
param_grid = {
    'C': [0.01, 0.1, 1, 2, 10],
    'kernel': ['linear', 'rbf', 'poly'],
    'degree': [3, 4, 5],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1, 1]
}

# GridSearch model
grid_svm_model = GridSearchCV(
    estimator=SVC(random_state=42),
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,
    n_jobs=-1
)

# Fit on training data
grid_svm_model.fit(X_train_scaled, y_train)

# Best params and CV accuracy
print("GridSearch BEST PARAMETERS :", grid_svm_model.best_params_)
print("GridSearch BEST SCORE:", grid_svm_model.best_score_)


GridSearch BEST PARAMETERS : {'C': 10, 'degree': 3, 'gamma': 'scale', 'kernel': 'poly'}
GridSearch BEST SCORE: 0.8283732892440526


In [None]:
grid_best_model = grid_svm_model.best_estimator_

y_pred = grid_best_model.predict(X_test_scaled)

print("Test Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:")
print(classification_report(y_test, y_pred))


Test Accuracy: 0.7622377622377622

Classification Report:
              precision    recall  f1-score   support

           0       0.82      0.78      0.80        87
           1       0.68      0.73      0.71        56

    accuracy                           0.76       143
   macro avg       0.75      0.76      0.75       143
weighted avg       0.77      0.76      0.76       143

