In [16]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Load Titanic dataset
file_path = 'titanic.csv'  # Adjust this if needed
titanic_data = pd.read_csv(file_path)

In [18]:
# Step 1: Preprocessing
# Drop irrelevant features
titanic_data_cleaned = titanic_data.drop(columns=['PassengerId', 'Name', 'Ticket', 'Cabin'])

# Handle missing values
titanic_data_cleaned['Age'].fillna(titanic_data_cleaned['Age'].mean(), inplace=True)  # Fill Age with mean
titanic_data_cleaned['Embarked'].fillna('missing', inplace=True)  # Fill Embarked with placeholder

# Encode categorical variables
titanic_data_encoded = pd.get_dummies(titanic_data_cleaned, drop_first=True)

# Separate features and target variable
X = titanic_data_encoded.drop(columns=['Survived'])
y = titanic_data_encoded['Survived']

In [20]:
# Step 2: Standardize the data
X_mean = X.mean()
X_std = X.std()
X_standardized = (X - X_mean) / X_std

In [22]:
# Step 3: Compute the covariance matrix
cov_matrix = np.cov(X_standardized.T)

In [24]:
# Step 4: Compute eigenvalues and eigenvectors
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)

In [26]:
# Step 5: Sort eigenvalues and eigenvectors in descending order
sorted_indices = np.argsort(eigenvalues)[::-1]
eigenvalues = eigenvalues[sorted_indices]
eigenvectors = eigenvectors[:, sorted_indices]

In [34]:
# Step 6: Project data onto the top 4 principal components
k = 5  # Number of principal components
top_eigenvectors = eigenvectors[:, :k]
X_pca = np.dot(X_standardized, top_eigenvectors)
# Step 7: Explained variance ratio
explained_variance_ratio = eigenvalues / np.sum(eigenvalues)
# Print explained variance ratio
print("Explained Variance Ratio:", explained_variance_ratio[:k])

Explained Variance Ratio: [0.2052705  0.19121186 0.17205714 0.10949081 0.09226028]


In [36]:
X=X_pca

In [38]:
# Step 2: Implement Linear SVM (Primal form)
def hinge_loss(w, X, y, C=1):
    return 0.5 * np.dot(w, w) + C * np.sum(np.maximum(0, 1 - y * (np.dot(X, w))))

def gradient(w, X, y, C=1):
    return w - C * np.dot(X.T, (y * (np.maximum(0, 1 - y * np.dot(X, w)))))

# Step 3: Train the model using Gradient Descent
def train_svm(X, y, learning_rate=0.001, epochs=1000, C=1):
    w = np.zeros(X.shape[1])
    y = 2 * y - 1  # Convert target to {-1, 1}
    
    for epoch in range(epochs):
        grad = gradient(w, X, y, C)
        w -= learning_rate * grad
        
        # if epoch % 100 == 0:
        #     loss = hinge_loss(w, X, y, C)
        #     print(f"Epoch {epoch}: Loss = {loss}")
    
    return w

# Train the SVM model
w = train_svm(X, y, learning_rate=0.001, epochs=1000, C=1)

# Predict with SVM
y_pred = np.sign(np.dot(X, w))  # Predictions in -1, 1

# Convert y_pred back to 0/1 for metrics calculation
y_pred_binary = np.where(y_pred == -1, 0, 1)

# Calculate metrics
TP = np.sum((y_pred_binary == 1) & (y == 1))
TN = np.sum((y_pred_binary == 0) & (y == 0))
FP = np.sum((y_pred_binary == 1) & (y == 0))
FN = np.sum((y_pred_binary == 0) & (y == 1))

accuracy = (TP + TN) / (TP + TN + FP + FN)
precision = TP / (TP + FP) if (TP + FP) != 0 else 0
recall = TP / (TP + FN) if (TP + FN) != 0 else 0
f1_score = 2 * (precision * recall) / (precision + recall) if (precision + recall) != 0 else 0

# Display metrics
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1 Score: {f1_score * 100:.2f}%")

Accuracy: 77.67%
Precision: 70.61%
Recall: 71.64%
F1 Score: 71.12%
