<a href="https://colab.research.google.com/github/TianB33/COMP551Miniprojects/blob/main/Miniproject1-dataset1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Below is the colab directory of Boston Housing dataset related programs.

In [13]:
import pandas as pd
import numpy as np
import csv

df = pd.read_csv("boston.csv")
df = df.drop("B", axis = 1)
# df[df.eq('?').any(1)]
display(df)

# dependent variable is MEDV

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,LSTAT,MEDV
0,0.00632,18.0,2.31,0,0.538,6.575,65.2,4.0900,1,296.0,15.3,4.98,24.0
1,0.02731,0.0,7.07,0,0.469,6.421,78.9,4.9671,2,242.0,17.8,9.14,21.6
2,0.02729,0.0,7.07,0,0.469,7.185,61.1,4.9671,2,242.0,17.8,4.03,34.7
3,0.03237,0.0,2.18,0,0.458,6.998,45.8,6.0622,3,222.0,18.7,2.94,33.4
4,0.06905,0.0,2.18,0,0.458,7.147,54.2,6.0622,3,222.0,18.7,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0,0.573,6.593,69.1,2.4786,1,273.0,21.0,9.67,22.4
502,0.04527,0.0,11.93,0,0.573,6.120,76.7,2.2875,1,273.0,21.0,9.08,20.6
503,0.06076,0.0,11.93,0,0.573,6.976,91.0,2.1675,1,273.0,21.0,5.64,23.9
504,0.10959,0.0,11.93,0,0.573,6.794,89.3,2.3889,1,273.0,21.0,6.48,22.0


In [24]:
class Analytic_linear_regression:
    def __init__(self):
        pass

    def preprocess(self, X):
        if X.ndim == 1:
            X = X[:, None]
        N = X.shape[0]
        X = X.column_stack(X, np.ones(N))
        return X;

    def fit(self, X, y):
        X = self.preprocess(X)

        self.w = np.linalg.inv(X.T @ X) @ X.T @ y
        return self

    def predict(self, X):
        X = self.preprocess(X)
        return X @ self.w

    def loss(self, X, y, w):
        X = self.preprocess(X)
        y_hat = X @ w
        diff_squared_half = ((y_hat - y) ** 2) / 2
        return np.mean(diff_squared_half)

    def loss_of_predict(self, X, y):
        return self.loss(X, y, self.w)

In [23]:
def logistic(x):
    return 1 / (1 + np.exp(-x))

class logistic_regression:
    def __init__(self, learning_rate = .01, epsilon = .0001, iteration = 1e6):
        self.lr = learning_rate
        self.epsilon = epsilon
        self.iters = iteration

    def preprocess(self, X):
        if X.ndim == 1:
            X = X[:, None]
        N = X.shape[0]
        X = X.column_stack(X, np.ones(N))
        return X;

    def gradient(self, X, y):
        # when the loss function is J_(X, y), gradient at w
        N = X.shape[0]
        y_hat = logistic(X @ self.w)
        return (X.T @ (y_hat - y)) / N

    def fit(self, X, y):
        X = self.preprocess(X)
        N, D = X.shape

        self.w = np.zeros(D)
        grad = np.inf
        t = 0

        while np.linalg.norm(grad) > self.epsilon and t < self.iters:
            grad = self.gradient(X, y)
            self.w = self.w - self.lr * grad
            t += 1
        return self

    def predict(self, X):
        X = self.preprocess(X)
        return logistic(X @ self.w)

    def loss(self, X, y, w):
        X = self.preprocess(X)
        N = X.shape[0]
        logit = X @ w
        return np.mean(y * np.log1p(np.exp(-logit)) + (1 - y) * np.log1p(np.exp(logit)))

    def loss_of_predict(self, X, y):
        return self.loss(X, y, self.w)

In [22]:
class minibatch_SGD_linear_regression:
    def __init__(self, batch_size = 32, learning_rate = .01, epsilon = .0001, iteration = 1e6):
        self.batch_size = batch_size
        self.lr = learning_rate
        self.epsilon = epsilon
        self.iters = iteration

    def preprocess(self, X):
        if X.ndim == 1:
            X = X[:, None]
        N = X.shape[0]
        X = X.column_stack(X, np.ones(N))
        return X;

    def gradient(self, X, y):
        # here X and y are a subset of dataframe, using mini-batches
        N = X.shape[0]
        y_hat = X @ self.w
        return (X.T @ (y_hat - y)) / N

    def fit(self, X, y):
        X = self.preprocess(X)
        N, D = X.shape

        self.w = np.zeros(D)
        grad = np.inf
        batch_number = N // self.batch_size
        t = 0

        while np.linalg.norm(grad) > self.epsilon and t < self.iteration:
            indices = np.arange(N)
            np.random.shuffle(indices)
            X_shuffled = X[indices]
            y_shuffled = y[indices]

            for batch in range(batch_number):
                start = batch * self.batch_size
                end = min(N, (batch + 1) * self.batch_size) # the last batch may be smaller
                X_batch = X_shuffled[start:end]
                y_batch = y_shuffled[start:end]

                grad = self.gradient(X, y)
                self.w = self.w - self.lr * grad

            t += 1
        return self

    def predict(self, X, y):
        X = self.preprocess(X)
        return X @ self.w

    def loss(self, X, y, w):
        X = self.preprocess(X)
        y_hat = X @ w
        diff_squared_half = ((y_hat - y) ** 2) / 2
        return np.mean(diff_squared_half)

    def loss_of_predict(self, X, y):
        return self.loss(X, y, self.w)

In [21]:
def logistic(x):
    return 1 / (1 + np.exp(-x))

class minibatch_SGD_logistic_regression:
    def __init__(self, batch_size = 32, learning_rate = .01, epsilon = .0001, iteration = 1e6):
        self.batch_size = batch_size
        self.lr = learning_rate
        self.epsilon = epsilon
        self.iters = iteration

    def preprocess(self, X):
        if X.ndim == 1:
            X = X[:, None]
        N = X.shape[0]
        X = X.column_stack(X, np.ones(N))
        return X;

    def gradient(self, X, y):
        # here X and y are a subset of dataframe, using mini-batches
        N = X.shape[0]
        y_hat = logistic(X @ self.w)
        return (X.T @ (y_hat - y)) / N

    def fit(self, X, y):
        X = self.preprocess(X)
        N, D = X.shape

        self.w = np.zeros(D)
        grad = np.inf
        batch_number = N // self.batch_size
        t = 0

        while np.linalg.norm(grad) > self.epsilon and t < self.iteration:
            indices = np.arange(N)
            np.random.shuffle(indices)
            X_shuffled = X[indices]
            y_shuffled = y[indices]

            for batch in range(batch_number):
                start = batch * self.batch_size
                end = min(N, (batch + 1) * self.batch_size) # the last batch may be smaller
                X_batch = X_shuffled[start:end]
                y_batch = y_shuffled[start:end]

                grad = self.gradient(X, y)
                self.w = self.w - self.lr * grad

            t += 1
        return self

    def predict(self, X):
        X = self.preprocess(X)
        return logistic(X @ self.w)

    def loss(self, X, y, w):
        X = self.preprocess(X)
        N = X.shape[0]
        logit = X @ w
        return np.mean(y * np.log1p(np.exp(-logit)) + (1 - y) * np.log1p(np.exp(logit)))

    def loss_of_predict(self, X, y):
        return self.loss(X, y, self.w)