In [1]:
# Day 13 exploring algorithms for classification 

In [7]:
import numpy as np
from collections import Counter

class KNNClassifier:
    def __init__(self, k=3):
        self.k = k
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        self.X_train = np.array(X)
        self.y_train = np.array(y)

    def _euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2) ** 2))

    def predict(self, X):
        predictions = []
        for point in X:
            distances = [self._euclidean_distance(point, x_train) for x_train in self.X_train]
            k_indices = np.argsort(distances)[:self.k]
            k_labels = [self.y_train[i] for i in k_indices]
            most_common = Counter(k_labels).most_common(1)[0][0]
            predictions.append(most_common)
        return predictions


In [8]:
import numpy as np
from collections import defaultdict

class NaiveBayesClassifier:
    def fit(self, X, y):
        self.classes = np.unique(y)
        self.mean = {}
        self.var = {}
        self.priors = {}

        for c in self.classes:
            X_c = X[np.array(y) == c]
            self.mean[c] = np.mean(X_c, axis=0)
            self.var[c] = np.var(X_c, axis=0)
            self.priors[c] = len(X_c) / len(X)

    def _gaussian_probability(self, x, mean, var):
        eps = 1e-6
        coeff = 1.0 / np.sqrt(2.0 * np.pi * var + eps)
        exponent = np.exp(- (x - mean)**2 / (2 * var + eps))
        return coeff * exponent

    def predict(self, X):
        predictions = []
        for x in X:
            posteriors = {}
            for c in self.classes:
                prior = np.log(self.priors[c])
                likelihood = np.sum(np.log(self._gaussian_probability(x, self.mean[c], self.var[c])))
                posteriors[c] = prior + likelihood
            predictions.append(max(posteriors, key=posteriors.get))
        return predictions


In [9]:
import numpy as np

class LogisticRegressionScratch:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        X = np.c_[np.ones(X.shape[0]), X]
        self.theta = np.zeros(X.shape[1])

        for _ in range(self.epochs):
            z = X @ self.theta
            h = self.sigmoid(z)
            gradient = (X.T @ (h - y)) / y.size
            self.theta -= self.lr * gradient

    def predict(self, X):
        X = np.c_[np.ones(X.shape[0]), X]
        return (self.sigmoid(X @ self.theta) >= 0.5).astype(int)


In [10]:
import numpy as np

class Perceptron:
    def __init__(self, lr=0.01, epochs=1000):
        self.lr = lr
        self.epochs = epochs

    def fit(self, X, y):
        self.weights = np.zeros(X.shape[1])
        self.bias = 0

        for _ in range(self.epochs):
            for xi, target in zip(X, y):
                update = self.lr * (target - self.predict(xi))
                self.weights += update * xi
                self.bias += update

    def predict(self, X):
        linear_output = np.dot(X, self.weights) + self.bias
        return np.where(linear_output >= 0, 1, 0)


In [11]:
class Node:
    def __init__(self, feature=None, threshold=None, left=None, right=None, *, value=None):
        self.feature = feature
        self.threshold = threshold
        self.left = left
        self.right = right
        self.value = value

class DecisionTreeClassifier:
    def __init__(self, max_depth=3):
        self.max_depth = max_depth

    def fit(self, X, y):
        self.tree = self._grow_tree(X, y)

    def _gini(self, y):
        classes, counts = np.unique(y, return_counts=True)
        prob = counts / counts.sum()
        return 1 - np.sum(prob ** 2)

    def _split(self, X, y, feature, threshold):
        left = X[:, feature] < threshold
        return X[left], y[left], X[~left], y[~left]

    def _best_split(self, X, y):
        best_feature, best_thresh, best_gini = None, None, float("inf")
        for feature in range(X.shape[1]):
            thresholds = np.unique(X[:, feature])
            for t in thresholds:
                X_left, y_left, X_right, y_right = self._split(X, y, feature, t)
                if len(y_left) == 0 or len(y_right) == 0:
                    continue
                gini = (len(y_left)*self._gini(y_left) + len(y_right)*self._gini(y_right)) / len(y)
                if gini < best_gini:
                    best_feature, best_thresh, best_gini = feature, t, gini
        return best_feature, best_thresh

    def _grow_tree(self, X, y, depth=0):
        if len(set(y)) == 1 or depth == self.max_depth:
            return Node(value=np.bincount(y).argmax())

        feature, threshold = self._best_split(X, y)
        if feature is None:
            return Node(value=np.bincount(y).argmax())

        X_left, y_left, X_right, y_right = self._split(X, y, feature, threshold)
        left = self._grow_tree(X_left, y_left, depth+1)
        right = self._grow_tree(X_right, y_right, depth+1)
        return Node(feature, threshold, left, right)

    def _traverse(self, x, node):
        if node.value is not None:
            return node.value
        if x[node.feature] < node.threshold:
            return self._traverse(x, node.left)
        else:
            return self._traverse(x, node.right)

    def predict(self, X):
        return [self._traverse(x, self.tree) for x in X]
