In [15]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

In [16]:
housing = pd.read_csv("housing.csv")

In [17]:
housing["ocean_proximity_label"] = pd.factorize(housing["ocean_proximity"])[0]

In [18]:
data = housing.drop(columns = ["ocean_proximity"]) 

In [19]:
X = data[["longitude", "latitude","housing_median_age", "total_rooms", "population", "households", "median_income", "ocean_proximity_label"]].values
y = data["median_house_value"].values

In [20]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)

In [21]:
X_train[:,4].reshape(len(X_train),-1).shape

(16512, 1)

# Normal Linear Regression Applied

In [22]:
def get_pred_value(a,b,data):
    #n = len(data.keys())
    X = data
    print("Shape of X:", X.shape)
    y = np.dot(X,a)+b
    return y

In [23]:
def get_loss(y_hat,y):
    n = len(y)
    y = y.reshape(n,-1) 
    diff = (y_hat-y)**2
    summed = np.sum(diff)
    return (1/n)*summed

In [27]:
def update_params(data,y,y_hat,coeff,b,alpha = 0.0001):
    #get rate of change
    n = data.shape[1]
    y = y.reshape(len(y_train),-1)
    b_new = b - alpha*(2/n)*np.sum(y_hat-y)
    
    for i, x in enumerate(coeff):
        current_feat = data[:,i].reshape(len(data),-1)
        diff = y_hat-y
        dot = np.dot(current_feat.T,diff)
        eva = (alpha)*((2/n)*np.sum(dot))
        coeff[i] = coeff[i] - eva
    
    return coeff, b_new

In [28]:
loss = 10000
#Randomly defining initial coefficients
a = np.random.rand(1,X_train.shape[1]).T
#Setting inital biases to zero
b = np.zeros((X_train.shape[0],1),dtype=int)
for i in range(4):
    #get best ftt line based on current coefficients and baises
    y_hat = get_pred_value(a,b,X_train)
    #Calculate loss according to above predicted values
    loss = get_loss(y_hat,y_train)
    print("Loss =======>", loss)
    #Update parameters to minimise loss
    print("a =", a)
    print("b =", b)
    a, b = update_params(X_train,y_train,y_hat,a,b,alpha = 0.001)

print("Fitting complete!!")
print("Final Loss acheived:", loss)

Shape of X: (16512, 8)
a = [[0.99804005]
 [0.02841696]
 [0.61198178]
 [0.16980793]
 [0.60193001]
 [0.78059846]
 [0.44564914]
 [0.82408   ]]
b = [[0]
 [0]
 [0]
 ...
 [0]
 [0]
 [0]]
Shape of X: (16512, 8)
a = [[-1.01201118e+08]
 [ 3.00059234e+07]
 [ 2.49106778e+07]
 [ 2.35831947e+09]
 [ 1.18809456e+09]
 [ 4.32946732e+08]
 [ 3.88876541e+06]
 [ 1.15305850e+06]]
b = [[845923.83723542]
 [845923.83723542]
 [845923.83723542]
 ...
 [845923.83723542]
 [845923.83723542]
 [845923.83723542]]
Shape of X: (16512, 8)
a = [[ 4.01206655e+15]
 [-1.19371153e+15]
 [-8.44356002e+14]
 [-1.46033421e+17]
 [-7.56688855e+16]
 [-2.64559283e+16]
 [-1.37695211e+14]
 [-4.93000795e+13]]
b = [[-3.35775953e+13]
 [-3.35775953e+13]
 [-3.35775953e+13]
 ...
 [-3.35775953e+13]
 [-3.35775953e+13]
 [-3.35775953e+13]]
Shape of X: (16512, 8)
a = [[-2.49671359e+23]
 [ 7.42837740e+22]
 [ 5.25463687e+22]
 [ 9.08728335e+24]
 [ 4.71111984e+24]
 [ 1.64661698e+24]
 [ 8.56643211e+21]
 [ 3.06783821e+21]]
b = [[2.08954618e+21]
 [2.089546

# Lasso Regression


In [26]:
def lasso_fit(x,y,l1 = 1, itr = 10):
    m,n = x.shape
    for _ in range(itr):
        w = np.random.rand(x.shape[1],1)
        for j in range(n):
            yi = np.dot(x,w)
            diff = y - yi
    