In [None]:
from sklearn.metrics import mean_squared_error as MSE
import numpy as np

class LinearRegressionMe(): #MULTIVARIATE !!
    def __init__(self, num_features):
        self.weights = np.random.rand(num_features)
        self.bias = 1
        self.backprop_params = {}
        
        
    def MSE_loss(self, pred, true):
        num_observations = len(pred)
        # Calculate MSE
        error = pred - true
        error_sq = error ** 2
        sse = np.sum(error_sq)
        mse = (1 / num_observations) * sse
        
        # store params necessary for backprop
        self.backprop_params = {'error': error}
        
        return mse, num_observations
    
    def manual_backprop(self, X):
        
        m = X.shape[0]  # Number of samples

        # local gradients
        dLoss_dMse = 1 # redundant just to show loss is mse 
        dMse_dSse = 1 / m
        dSse_dError_sq = np.ones(m)
        dErrorSq_derror = 2 * self.backprop_params['error']
        dError_dPred = 1
        
        # chain rule
        dloss_dpred = dLoss_dMse * dMse_dSse * dSse_dError_sq * dErrorSq_derror * dError_dPred
        
        
        dpred_dweights = X
        dpred_dbias = 1
        
        # Final gradient calculation
        dloss_dweights = dpred_dweights.T @ dloss_dpred 
        dloss_dbias = np.sum(dloss_dpred*dpred_dbias)
        
        return dloss_dweights, dloss_dbias
    
    
    # Gradient of loss with respect to W and b
    def compute_gradients(self, X, y_true, y_pred):
        m = X.shape[0]  # Number of samples

        # gradient of the loss w.r.t. W
        dW = -(2/m) * X.T @ (y_true - y_pred)
        
        # gradient of the loss w.r.t. b
        db = -(2/m) * np.sum(y_true - y_pred)

        return dW, db
        
        
    def fit(self, X, y):
        # store model output, part of foward pass. 
        num_observations = len(X) # AKA batch size
        
        # forward pass, calculate prediction result
        pred = X @ self.weights + self.bias #weighted sum of inputs, technically affine transformation
                                            #same architecture for a single linear layer in a nn
        print(pred)
        print(y)
        
        # # calculate loss
        loss, num_observations = self.MSE_loss(pred, y) # = np.mean((y - y_pred) ** 2)
        true_loss = MSE(pred, y)
        # print(loss, true_loss)
        
        #  perform backward pass
        dloss_dweights, dloss_dbias = self.manual_backprop(X)
        dloss2, dweights2 = self.compute_gradients(X, y, pred)
        
        
        # print(dloss_dweights, dloss_dbias)
        # print(dloss2, dweights2)

        