### Sklearn Module KNN

In [4]:
import numpy as np
from sklearn import neighbors
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
def my_weight_1(distances):
  sigma2 = .4 # we can change this number
  return np.exp(-distances**2 / sigma2)

def my_weight_2(distances):
  return 1 / distances

In [6]:
def sklearn_KNN(X_train, y_train, distance):
  sklearn_model =  neighbors.KNeighborsClassifier(n_neighbors = 1, p = 2, weights = distance)
  sklearn_model.fit(X_train, y_train)
  y_pred = sklearn_model.predict(X_test)
  return y_pred

### Edit KNN module

In [2]:
import numpy as np
from collections import Counter

In [7]:
def euclidean_distance(x1, x2):
    distance = np.sqrt(sum((x1 - x2) ** 2))
    return distance

class KNN:
    def __init__(self, k = 3):
        self.k = k
    
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
    
    def predict(self, X):
        predictions = [self._predict(x) for x in X]
        return predictions
    
    def _predict(self, x):
        # Compute the distance
        distances = [euclidean_distance(x, x_train) for x_train in self.X_train]
        # Get the closest k
        k_indices = np.argsort(distances)[:self.k]
        k_nearest_labels = [self.y_train[i] for i in k_indices]
        # Majority vote
        most_common = Counter(k_nearest_labels).most_common()
        return most_common[0][0]

### Import library
 

In [8]:
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import csv

### Define Function

In [9]:
def precessing_data(data):
    pre_data = []
    pre_target = []
    for i in range(0, len(data)):
        temp_data = []
        pre_target.append(data.Survived[i])
        if (data.Sex[i] == "male"):
            temp_data.append(1)
        else:
            temp_data.append(0)
        temp_data.append(data.Age[i])
        temp_data.append(data.SibSp[i])
        temp_data.append(data.Parch[i])
        pre_data.append(temp_data)
    pre_data = np.array(pre_data)
    pre_target = np.array(pre_target)
    return pre_data, pre_target

In [10]:
def precessing_test(data):
    pre_data = []
    for i in range(0, len(data)):
        temp_data = []
        if (data.Sex[i] == "male"):
            temp_data.append(1)
        else:
            temp_data.append(0)
        temp_data.append(data.Age[i])
        temp_data.append(data.SibSp[i])
        temp_data.append(data.Parch[i])
        pre_data.append(temp_data)
    pre_data = np.array(pre_data)
    return pre_data

In [11]:
def write_submission_data(prediction):
  submission_data = []
  for i in range(0, len(predictions)):
      temp = []
      temp.append(i + 892)
      temp.append(predictions[i])
      submission_data.append(temp)
  return submission_data

In [12]:
def write_submission(cd, submission_data):
    header = ["PassengerId", "Survived"]
    # open the file in the write mode
    f = open(cd, 'w')
    # create the csv writer
    writer = csv.writer(f)
    # write a row to the csv file
    writer.writerow(header)
    for i in range(0, len(submission_data)):
        writer.writerow(submission_data[i])
    # close the file
    f.close()

### Code

In [13]:
# read train data
titanic_data = pd.read_csv("/content/drive/MyDrive/Machine Learning Code/Titanic - Machine Learning from Disaster/Data/train_data.csv")
# Read test data
test_data = pd.read_csv("/content/drive/MyDrive/Machine Learning Code/Titanic - Machine Learning from Disaster/Data/test_data.csv")
# Create X and y
X, y = precessing_data(titanic_data)
# Set k = 10
clf = KNN(k=10)
clf.fit(X, y)
X_test = precessing_test(test_data)
predictions = clf.predict(X_test)
write_submission("/content/drive/MyDrive/Machine Learning Code/Titanic - Machine Learning from Disaster/Submit/Submission_KNN.csv", write_submission_data(predictions))

### Sklearn model built

In [None]:
predictions = sklearn_KNN(X, y, my_weight_1)
write_submission("/content/drive/MyDrive/Machine Learning Code/Titanic - Machine Learning from Disaster/Submit/Submission_sklearn_w1.csv", write_submission_data(predictions))

In [None]:
predictions = sklearn_KNN(X, y, my_weight_2)
write_submission("/content/drive/MyDrive/Machine Learning Code/Titanic - Machine Learning from Disaster/Submit/Submission_sklearn_w2.csv", write_submission_data(predictions))