In [49]:
import numpy as np
import pandas as pd

In [50]:
# Linear Regression
# Pedhazur Method
# Caluculius Method
# Logistic Regression
# Stocastic Gradient Descent

In [51]:
def RMSE(Y, Y_pred):
    Y, Y_pred = np.array(Y), np.array(Y_pred)
    err = np.mean((Y_pred - Y) ** 2)
    return err**0.5

def MSE(Y, Y_pred):
    Y, Y_pred = np.array(Y), np.array(Y_pred)
    return np.mean((Y_pred - Y) ** 2)

def error(Y, Y_pred):
    Y, Y_pred = np.array(Y), np.array(Y_pred)
    return np.sum(Y_pred - Y)

In [52]:
class LinearRegressionPedhazur:
    def __init__(self):
        self.slope = None
        self.intercept = None
    
    def predict(self, X):
        X = np.array(X)
        return self.slope*X + self.intercept
    
    def fit(self, X, Y):
        X, Y = np.array(X), np.array(Y)

        x_mean = np.mean(X)
        y_mean = np.mean(Y)

        self.slope = (np.sum((X-x_mean) * (Y-y_mean)))/(np.sum((X - x_mean) ** 2))
        self.intercept = y_mean - self.slope*x_mean

In [53]:
df = pd.read_csv('../Week3/datasets/sample.csv')

In [54]:
model = LinearRegressionPedhazur()
model.fit(df['X'], df['Y'])
print(model.slope, model.intercept)

0.5909090909090909 2.2727272727272725


In [55]:
class LinearRegressionCalculus:
    def __init__(self):
        self.coefficients = None
    
    def fit(self, X, Y):
        X, Y = np.array(X), np.array(Y)
        n = len(X)
        X = np.insert(X, 0, np.ones(n), axis=1)
        X_T = X.T
        A = X_T @ X
        B = X_T @ Y
        self.coefficients = np.linalg.inv(A) @ B

    
    def predict(self, X):
        X = np.array(X)
        n = len(X)
        X = np.insert(X, 0, np.ones(n), axis=1)
        return X @ self.coefficients

In [56]:
model1 = LinearRegressionCalculus()
model1.fit(df['X'].values.reshape(-1, 1), df['Y'].values.reshape(-1, 1))
pred = model1.predict(df['X'].values.reshape(-1, 1))

In [58]:
RMSE(df['Y'].values.reshape(-1, 1), pred)

5.020661157024793

In [91]:
class SimplePolynomialRegression:
    def __init__(self, degree=2):
        self.degree = degree
        self.coefficients = None
    
    def fit(self, X, Y):
        X, Y = np.array(X), np.array(Y)
        X_features = np.ones((X.shape[0], self.degree + 1))

        for i in range(1, self.degree + 1):
            X_features[:, i] = (X ** i).flatten()
       
        A = X_features.T @ X_features
        B = X_features.T @ Y
        self.coefficients = np.linalg.inv(A) @ B
    
    def predict(self, X):
        X = np.array(X)
        X_features = np.ones((X.shape[0], self.degree + 1))

        for i in range(1, self.degree + 1):
            X_features[:, i] = (X ** i).flatten()

        return X_features @ self.coefficients



In [92]:
df1 = pd.read_csv('../Week4/datasets/sampledata1.csv')

In [93]:
model2 = SimplePolynomialRegression(2)
model2.fit(df1['Temp'].values.reshape(-1, 1), df1['Yield'].values.reshape(-1, 1))
pred1 = model2.predict(df1['Temp'].values.reshape(-1, 1))

print(model2.coefficients)

[[ 7.96048110e+00]
 [-1.53711340e-01]
 [ 1.07560137e-03]]


In [96]:
df2 = pd.read_csv('../Week4/datasets/sampledata2.csv')
df2.drop(['Group'], axis='columns', inplace=True)

In [103]:
model3 = LinearRegressionCalculus()
X, Y = df2.drop('Infarc', axis='columns').values, df2['Infarc']
model3.fit(X, Y)
pred2 = model3.predict(X)
print(model3.coefficients)

[-0.13453638  0.61265498 -0.24348223 -0.06565569]


In [147]:
class LinearRegressionGradientDescent:
    def __init__(self, alpha, epoches, noOfVars):
        self.alpha = alpha
        self.epoches = epoches
        self.noOfVars = noOfVars
        self.coefeicients = np.zeros(noOfVars + 1)
    
    def fit(self, X, Y):
        X, Y = np.array(X), np.array(Y)
        for _ in range(self.epoches):
            for iter in range(len(X)):
                for i in range(self.noOfVars + 1):
                    self.coefeicients[i] -= self.alpha * self.der_rmse(X[iter], Y[iter], i-1)[0]
    
    def der_rmse(self, X, Y, i):
        if X.ndim == 0:
            X = np.array([X])
        if i == -1:
            return (self.predict([X]) - Y)
        return X[i] * (self.predict([X]) - Y)
    
    def predict(self, X):
        X = np.array(X)
        if X.ndim == 1:
            X = np.insert(X, 0, 1)
        else:
            X = np.insert(X, 0, 1, axis=1)
        return X @ self.coefeicients

In [148]:
df3 = pd.read_csv('../Week5/datasets/q1_data.csv')

In [149]:
X, Y = df3['mother_height'].values, df3['daughter_height'].values
model4 = LinearRegressionGradientDescent(0.0001, 4, 1)
model4.fit(X, Y)
pred3 = model4.predict(X.reshape(-1, 1))

print(model4.coefeicients)

[0.01640781 1.01491833]


In [151]:
class LogisticRegressionGradientDescent:
    def __init__(self, alpha, epoches, noOfVars):
        self.alpha = alpha
        self.epoches = epoches
        self.noOfVars = noOfVars
        self.coefeicients = np.zeros(noOfVars + 1)
    
    def sigmoid(z):
        return 1 / (1 + np.exp(-z))
    
    def fit(self, X, Y):
        X, Y = np.array(X), np.array(Y)
        for _ in range(self.epoches):
            for iter in range(len(X)):
                self.coefeicients -= self.alpha * self.der_logloss(X[iter], Y[iter])
    
    def der_logloss(self, X, Y):
        X = np.insert(X, 0, 1)
        return X * (self.predict(X[1:]) - Y)
    
    def predict(self, X):
        X = np.array(X)
        if X.ndim == 1:
            X = np.insert(X, 0, 1)
        else:
            X = np.insert(X, 0, 1, axis=1)
        z = X @ self.coefeicients
        return LogisticRegressionGradientDescent.sigmoid(z)

In [152]:
df4 = pd.read_csv('../Week5/datasets/q2_data.csv')

In [156]:
X, Y = df4['no_of_hours_study'].values.reshape(-1, 1), df4['pass']
model5 = LogisticRegressionGradientDescent(0.01, 500, 1)
model5.fit(X, Y)
pred4 = model5.predict(X)

print(model5.coefeicients)

[-3.59024746  0.89747649]
