In [182]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [183]:
def generate_linear_regression_dataset(num_features = 2, n = 1000, sparse = False):
    X = np.random.uniform(low = 0, high = 1, size = (n, num_features))
    col_names = []
    for i in range(num_features):
        col_names.append('x'+str(i+1))
    df = pd.DataFrame(X, columns = col_names)
    hidden_params = np.random.uniform(low = -2, high = 2, size = (num_features+1))
    
    if(sparse):
        mask = np.random.choice(np.arange(num_features), size=int(0.5*num_features))
        hidden_params[mask] = 0
        
    hidden_params = np.round(hidden_params,2)
    df['y'] = np.matmul(df[col_names].values,hidden_params[:-1]) + hidden_params[-1]
    y = np.zeros(n)
    return df, hidden_params

In [186]:
class LinearRegression:
    def __init__(self, learning_rate  = 0.5):
        self._intercept = np.random.normal(size = 1)
        self.lr = learning_rate 
        
    def train(self, X ,y, max_iter = 1000):
        self.num_features = X.shape[1]
        self.weights = np.random.normal(size = self.num_features)
        self._intercept = np.random.normal(size = 1)
        num_iter = 0
        loss = self.MAE_Loss(y, self.predict(X))
        while(num_iter < max_iter and loss > 1e-6):
            num_iter += 1
            y_pred = self.predict(X)
            gradients, gradient_intercept = self.gradient_step(X, y, y_pred)
            self.weights = self.weights - self.lr * gradients
            self._intercept = self._intercept - self.lr * gradient_intercept
            
            loss = self.MAE_Loss(y, y_pred)
        print('Training Done in {} iteration'.format(num_iter))
        return loss
    
    def predict(self, X):
        return np.matmul(X, self.weights) + self._intercept
    
    def MAE_Loss(self, y, y_pred):
        return np.mean(np.abs(y-y_pred))
    
    def gradient_step(self, X, y, y_pred):
        gradients = np.zeros(self.num_features)
        gradient_intercept = np.zeros(1)
        for j in range(self.num_features):
            gradients[j] = np.mean((y_pred-y)*X[:,j])
        gradient_intercept = np.mean((y_pred-y))
        return gradients, gradient_intercept

In [185]:
df, hidden_params = generate_linear_regression_dataset(num_features = 4, n = 1000, sparse = True)
display(df[:5])
model = LinearRegression(learning_rate = 0.5)
model.train(df[df.columns[:-1]].values, df['y'].values)

estimated_params = np.round(list(model.weights)+list(model._intercept),2)
display(pd.DataFrame(zip(list(df.columns[:-1])+['intercept'], hidden_params,estimated_params), columns = ['Parameter', 'True Coeff','Estimated Coeff']))

Unnamed: 0,x1,x2,x3,x4,y
0,0.419594,0.082313,0.351642,0.406545,0.618401
1,0.104094,0.75768,0.593832,0.864209,0.493657
2,0.719112,0.221746,0.149251,0.238996,0.657695
3,0.707275,0.202114,0.988989,0.415242,-0.680932
4,0.294638,0.781928,0.457827,0.410865,0.005611


Training Done in 597 iteration


Unnamed: 0,Parameter,True Coeff,Estimated Coeff
0,x1,0.0,0.0
1,x2,-0.59,-0.59
2,x3,-1.95,-1.95
3,x4,1.63,1.63
4,intercept,0.69,0.69
