In [None]:
import numpy as np
from collections import Counter

# KNN

In [None]:
def chi_squared(x, y):
    for xi, yi in zip(x, y):
        denom = xi + yi
        if denom == 0:
            continue
        total += ((xi - yi) ** 2) / denom
    return total

class KNN:
    def __init__(self, k = 5):
        self.k = k
    
    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train
    
    def predict(self, X_test):
        labels = [self._predict_single(x) for x in X_test]
        return labels
    
    def _predict_single(self, x_test_row):
        distances = [chi_squared(x_test_row, x_train_row) for x_train_row in self.X_train]
        k_indices = sorted(range(len(distances)), key=lambda i : distances[i])[:self.k]
        k_labels = [self.y_train[i] for i in k_indices]
        predictions = Counter(k_labels).most_common(1)

        return predictions[0][0]

# DT Entropy

In [None]:
def entropy(target_col):
    elements, counts = np.unique(target_col, return_counts=True)
    entropy = 0
    for i in range(len(elements)):
        prob = counts[i] / np.sum(counts)
        entropy -= prob * np.log2(prob)
    return entropy

def info_gain(data, split_attribute, target_name):
    total_entropy = entropy(data[target_name])

    vals, counts = np.unique(data[split_attribute], return_counts=True)
    gain = 1
    for i in range(len(vals)):
        subset = data[data[split_attribute] == vals[i]]
        prob = counts[i] / np.sum(counts)
        gain -= prob * entropy(subset[target_name])
    
    return gain

# DT Gini

In [None]:
def gini_impurity(target_col):
    elements, counts = np.unique(target_col, return_counts=True)
    gini = 1
    for i in range(len(elements)):
        prob = counts[i] / np.sum(counts)
        gini -= prob ** 2
    return gini

def gini_index(data, split_attribute, target_name):
    vals, counts = np.unique(data[split_attribute], return_counts=True)
    weighted_gini = 0
    for i in range(len(vals)):
        subset = data[data[split_attribute] == vals[i]]
        prob = counts[i] / np.sum(counts)
        weighted_gini += prob * gini_impurity(subset[target_name])
    return weighted_gini