In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load Titanic dataset
file_path = 'titanic.csv'  # Adjust this if needed
titanic_data = pd.read_csv(file_path)

In [23]:
# Step 1: Preprocessing
# Drop irrelevant features
titanic_data_cleaned = titanic_data.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'])

# Handle missing values
titanic_data_cleaned['Age'].fillna(titanic_data_cleaned['Age'].mean(), inplace=True)  # Fill Age with mean
titanic_data_cleaned['Embarked'].fillna('missing', inplace=True)  # Fill Embarked with placeholder

# Encode categorical variables
titanic_data_encoded = pd.get_dummies(titanic_data_cleaned, drop_first=True)

# Separate features and target variable
X = titanic_data_encoded.drop(columns=['Survived'])
y = titanic_data_encoded['Survived']

In [37]:
# Step 2: Standardize the data
X_mean = X.mean()
X_std = X.std()
X_standardized = (X - X_mean) / X_std
print(X_standardized.shape)

(891, 9)


In [27]:
X=X_standardized

In [35]:
import numpy as np

# Hinge loss function
def hinge_loss(w, X, y, C=1):
    return 0.5 * np.dot(w, w) + C * np.sum(np.maximum(0, 1 - y * (np.dot(X, w))))

# Gradient of the hinge loss
def gradient(w, X, y, C=1):
    return w - C * np.dot(X.T, (y * (np.maximum(0, 1 - y * np.dot(X, w)))))

# Train the model using Gradient Descent
def train_svm(X, y, learning_rate=0.001, epochs=1000, C=1):
    # Initialize weights
    w = np.zeros(X.shape[1])
    y = 2 * y - 1  # Convert target to {-1, 1}
    
    for epoch in range(epochs):
        grad = gradient(w, X, y, C)
        w -= learning_rate * grad
        
        # Optionally, print the loss every 100 epochs
        # if epoch % 100 == 0:
        #     loss = hinge_loss(w, X, y, C)
        #     print(f"Epoch {epoch}: Loss = {loss}")
    
    return w

# Now let's train the model and get the weights (w)
w = train_svm(X, y)

# Predict with SVM
y_pred = np.sign(np.dot(X, w))  # SVM predictions in -1, 1

# Convert y_pred back to 0/1 for consistency
y_pred_binary = np.where(y_pred == -1, 0, 1)

# 2. Accuracy using confusion matrix
TP = np.sum((y_pred_binary == 1) & (y == 1))  # True Positives
TN = np.sum((y_pred_binary == 0) & (y == 0))  # True Negatives
FP = np.sum((y_pred_binary == 1) & (y == 0))  # False Positives
FN = np.sum((y_pred_binary == 0) & (y == 1))  # False Negatives

accuracy = (TP + TN) / (TP + TN + FP + FN)  # Consistent with 0, 1 labels
precision = TP / (TP + FP) if (TP + FP) != 0 else 0
recall = TP / (TP + FN) if (TP + FN) != 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

# Display the results
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1 Score: {f1_score * 100:.2f}%")


Accuracy: 79.57%
Precision: 73.95%
Recall: 72.22%
F1 Score: 73.08%
