In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

In [None]:
class KNN:
    def __init__(self, k=3):
        self.k = k
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
    def predict(self, X):
        y_pred = [self._predict(x) for x in X]
        return np.array(y_pred)
    def _predict(self, x):
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        most_common = np.bincount(k_nearest_labels).argmax()
        return most_common

Load data from CSV file

In [None]:
data = pd.read_csv('D:/Internships/titanic/train.csv')

Preprocess categorical features using one-hot encoding

In [None]:
categorical_features = ['Sex', 'Embarked']
data = pd.get_dummies(data, columns=categorical_features)

Drop irrelevant columns

In [None]:
data = data.drop(['PassengerId', 'Name', 'Ticket', 'Cabin'], axis=1)

Handle missing values

In [None]:
data = data.fillna(0)  # Replace missing values with 0

Separate features and target variable

In [None]:
X = data.drop('Survived', axis=1).values
y = data['Survived'].values

Split data into training and testing sets

In [None]:
split_idx = int(0.8 * len(data))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

Create and train KNN classifier

In [None]:
knn = KNN(k=3)
knn.fit(X_train, y_train)

Predict on the test set

In [None]:
y_pred = knn.predict(X_test)

Print the predicted labels

In [None]:
print("Predicted labels:", y_pred)

Evaluate accuracy

In [None]:
accuracy = np.sum(y_pred == y_test) / len(y_test)
print("Accuracy:", accuracy)

Plot the predicted and actual labels

In [None]:
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_pred, cmap='Paired', label='Predicted')
plt.scatter(X_test[:, 0], X_test[:, 1], c=y_test, cmap='Paired', marker='x', label='Actual')
plt.xlabel('Feature 1')
plt.ylabel('Feature 2')
plt.legend()
plt.show()