In [5]:
import numpy as np
import pandas as pd

col_names = ['survived', 'pclass', 'sex', 'age', 'siblings_spouse', 'parents_children', 'fare']
data = pd.read_csv("titanic_data.csv", skiprows=1, header=None, names=col_names)

data.head(5)

Unnamed: 0,survived,pclass,sex,age,siblings_spouse,parents_children,fare
0,0,3,0,22.0,1,0,7.25
1,1,1,1,38.0,1,0,71.2833
2,1,3,1,26.0,0,0,7.925
3,1,1,1,35.0,1,0,53.1
4,0,3,0,35.0,0,0,8.05


## Problem 5.1

In [7]:
from collections import Counter

class KNN:
    def __init__(self, k=5):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        predictions = [self._predict(x) for x in X_test]
        return np.array(predictions)

    def _predict(self, x):
        distances = [np.linalg.norm(x - x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]

In [16]:
data_np = data.to_numpy()

X = data_np[:, 1:]  # Features
y = data_np[:, 0]   # Labels

# Initialize and fit KNN model
knn = KNN(k = 5)
knn.fit(X, y)

# Test data
X_test = np.array([
    [3, 0, 25.0, 0, 0, 7.05],
    [1, 1, 17.0, 2, 2, 30.0],
    [1, 1, 12.0, 0, 0, 125.0]
])

# Predictions
predictions = knn.predict(X_test)
print("Predictions:")
for i, pred in enumerate(predictions):
    print("Sample:", X_test[i])
    print("Result: Survived" if pred == 1 else "Result: Died X(")

Predictions:
Sample: [ 3.    0.   25.    0.    0.    7.05]
Result: Died X(
Sample: [ 1.  1. 17.  2.  2. 30.]
Result: Survived
Sample: [  1.   1.  12.   0.   0. 125.]
Result: Survived


In [None]:
import matplotlib.pyplot as plt

# Plotting the test data points with their predicted labels
plt.figure(figsize=(8, 6))

# Plot the data points
for i, point in enumerate(X_test):
    if predictions[i] == 1:
        plt.scatter(point[0], point[2], color='green', marker='o', label='Survived')
    else:
        plt.scatter(point[0], point[2], color='red', marker='x', label='Died')

# Add labels and legend
plt.xlabel('Pclass')
plt.ylabel('Age')
plt.title('KNN Predictions')
plt.legend()

# Show plot
plt.show()
