In [24]:
import numpy as np
import pandas as pd

# Load dataset
data = pd.read_csv('C:\\Users\\HP\\Downloads\\titanic.csv')

# Handle missing values
data['Age'].fillna(data['Age'].median(), inplace=True)
data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)

# Encode categorical variables (manually)
data['Sex'] = np.where(data['Sex'] == 'male', 1, 0)
embarked_mapping = {'C': 0, 'Q': 1, 'S': 2}
data['Embarked'] = data['Embarked'].map(embarked_mapping)

# Extract features and labels
X = data.drop(['Survived', 'Name', 'Ticket', 'Cabin', 'PassengerId'], axis=1).values
y = data['Survived'].values

# Feature standardization
X_mean = np.mean(X, axis=0)
X_std = np.std(X, axis=0)
X_standardized = (X - X_mean) / X_std

# Train-test split (manually)
def train_test_split(X, y, test_size=0.2, random_state=42):
    np.random.seed(random_state)
    indices = np.arange(len(y))
    np.random.shuffle(indices)
    split = int(len(y) * (1 - test_size))
    train_indices, test_indices = indices[:split], indices[split:]
    return X[train_indices], X[test_indices], y[train_indices], y[test_indices]

X_train, X_test, y_train, y_test = train_test_split(X_standardized, y, test_size=0.2)



The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Age'].fillna(data['Age'].median(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  data['Embarked'].fillna(data['Embarked'].mode()[0], inplace=True)


In [25]:
# PCA implementation
def pca(X, n_components):
    X_meaned = X - np.mean(X, axis=0)
    covariance_matrix = np.cov(X_meaned, rowvar=False)
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
    sorted_indices = np.argsort(eigenvalues)[::-1]
    eigenvectors_subset = eigenvectors[:, sorted_indices[:n_components]]
    X_reduced = np.dot(X_meaned, eigenvectors_subset)
    return X_reduced

X_train_pca = pca(X_train, n_components=2)
X_test_pca = pca(X_test, n_components=2)

In [26]:
# SVM Class
class SVM:
    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=1000):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None
        self.b = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0
        y_ = np.where(y <= 0, -1, 1)

        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                condition = y_[idx] * (np.dot(x_i, self.w) - self.b) >= 1
                if condition:
                    self.w -= self.lr * (2 * self.lambda_param * self.w)
                else:
                    self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y_[idx]))
                    self.b -= self.lr * y_[idx]

    def predict(self, X):
        linear_output = np.dot(X, self.w) - self.b
        return np.sign(linear_output)


In [28]:
# Logistic Regression Class
class LogisticRegression:
    def __init__(self, learning_rate=0.01, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.w = None
        self.b = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.n_iters):
            linear_model = np.dot(X, self.w) + self.b
            y_predicted = self.sigmoid(linear_model)
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)
            self.w -= self.lr * dw
            self.b -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.w) + self.b
        y_predicted = self.sigmoid(linear_model)
        return [1 if i > 0.5 else 0 for i in y_predicted]

# Accuracy function
def accuracy(y_true, y_pred):
    return np.sum(y_true == y_pred) / len(y_true)

In [33]:
# Function to calculate confusion matrix
def confusion_matrix(y_true, y_pred):
    tp = np.sum((y_true == 1) & (y_pred == 1))  # True Positives
    tn = np.sum((y_true == 0) & (y_pred == 0))  # True Negatives
    fp = np.sum((y_true == 0) & (y_pred == 1))  # False Positives
    fn = np.sum((y_true == 1) & (y_pred == 0))  # False Negatives
    return np.array([[tn, fp], [fn, tp]])

# Function to calculate precision, recall, and F1 score
def precision_recall_f1(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    tn, fp, fn, tp = cm.ravel()
    
    precision = tp / (tp + fp) if (tp + fp) > 0 else 0
    recall = tp / (tp + fn) if (tp + fn) > 0 else 0
    f1 = 2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
    
    return precision, recall, f1

In [36]:
# Train and evaluate models without PCA
svm_model = SVM()
svm_model.fit(X_train, y_train)
svm_predictions = svm_model.predict(X_test)
svm_accuracy_no_pca = accuracy(y_test, svm_predictions)
svm_precision_no_pca, svm_recall_no_pca, svm_f1_no_pca = precision_recall_f1(y_test, svm_predictions)

logistic_model = LogisticRegression()
logistic_model.fit(X_train, y_train)
logistic_predictions = logistic_model.predict(X_test)
logistic_accuracy_no_pca = accuracy(y_test, logistic_predictions)
logistic_precision_no_pca, logistic_recall_no_pca, logistic_f1_no_pca = precision_recall_f1(y_test, logistic_predictions)

In [37]:
# Train and evaluate models with PCA
svm_model_pca = SVM()
svm_model_pca.fit(X_train_pca, y_train)
svm_predictions_pca = svm_model_pca.predict(X_test_pca)
svm_accuracy_with_pca = accuracy(y_test, svm_predictions_pca)
svm_precision_with_pca, svm_recall_with_pca, svm_f1_with_pca = precision_recall_f1(y_test, svm_predictions_pca)

logistic_model_pca = LogisticRegression()
logistic_model_pca.fit(X_train_pca, y_train)
logistic_predictions_pca = logistic_model_pca.predict(X_test_pca)
logistic_accuracy_with_pca = accuracy(y_test, logistic_predictions_pca)
logistic_precision_with_pca, logistic_recall_with_pca, logistic_f1_with_pca = precision_recall_f1(y_test, logistic_predictions_pca)

In [38]:
# Summary of results
print("\n--- Model Performance Comparison ---")
print(f"SVM Accuracy without PCA: {svm_accuracy_no_pca:.4f}, Precision: {svm_precision_no_pca:.4f}, Recall: {svm_recall_no_pca:.4f}, F1 Score: {svm_f1_no_pca:.4f}")
print(f"Logistic Regression Accuracy without PCA: {logistic_accuracy_no_pca:.4f}, Precision: {logistic_precision_no_pca:.4f}, Recall: {logistic_recall_no_pca:.4f}, F1 Score: {logistic_f1_no_pca:.4f}")
print(f"SVM Accuracy with PCA: {svm_accuracy_with_pca:.4f}, Precision: {svm_precision_with_pca:.4f}, Recall: {svm_recall_with_pca:.4f}, F1 Score: {svm_f1_with_pca:.4f}")
print(f"Logistic Regression Accuracy with PCA: {logistic_accuracy_with_pca:.4f}, Precision: {logistic_precision_with_pca:.4f}, Recall: {logistic_recall_with_pca:.4f}, F1 Score: {logistic_f1_with_pca:.4f}")


--- Model Performance Comparison ---
SVM Accuracy without PCA: 0.2570, Precision: 0.7541, Recall: 1.0000, F1 Score: 0.8598
Logistic Regression Accuracy without PCA: 0.8045, Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
SVM Accuracy with PCA: 0.0391, Precision: 0.1094, Recall: 1.0000, F1 Score: 0.1972
Logistic Regression Accuracy with PCA: 0.3128, Precision: 0.0000, Recall: 0.0000, F1 Score: 0.0000
