In [71]:
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt

In [72]:
housing = pd.read_csv("housing.csv")

In [73]:
housing["ocean_proximity_label"] = pd.factorize(housing["ocean_proximity"])[0]

In [74]:
data = housing.drop(columns = ["ocean_proximity"]) 

In [75]:
X = data[["longitude", "latitude","housing_median_age", "total_rooms", "population", "households", "median_income", "ocean_proximity_label"]].values
y = data["median_house_value"].values

In [76]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.20)

In [77]:
X_train[:,4].reshape(len(X_train),-1).shape

(16512, 1)

# Normal Linear Regression Applied

In [92]:
def get_pred_value(a,b,data):
    #n = len(data.keys())
    X = data
    print("Shape of X:", X.shape)
    y = np.dot(X,a)+b
    return y

In [93]:
def get_loss(y_hat,y):
    n = len(y)
    y = y.reshape(n,-1) 
    diff = (y_hat-y)**2
    summed = np.sum(diff)
    return (1/n)*summed

In [96]:
def update_params(data,y,coeff,b,alpha = 0.0001):
    #get rate of change
    n = data.shape[1]
    y = y.reshape(len(y_train),-1)
    b_new = b - alpha*(2/n)*sum(y_hat-y)
    
    for i, x in enumerate(coeff):
        current_feat = data[:,i].reshape(len(data),-1)
        diff = y_hat-y
        dot = np.dot(current_feat.T,diff)
        eva = (alpha)*((2/n)*np.sum(dot))
        coeff[i] = coeff[i] - eva
    print("a =", coeff)
    print("b =", b_new)
    return coeff, b_new

In [97]:
loss = 10000
#Randomly defining initial coefficients
a = np.random.rand(1,X_train.shape[1]).T
#Setting inital biases to zero
b = np.zeros((X_train.shape[0],1),dtype=int)
for i in range(4):
    #get best ftt line based on current coefficients and baises
    y_hat = get_pred_value(a,b,X_train)
    #Calculate loss according to above predicted values
    loss = get_loss(y_hat,y_train)
    print("Loss =======>", loss)
    #Update parameters to minimise loss
    a, b = update_params(X_train,y_train,a,b,alpha = 0.001)

print("Fitting complete!!")
print("Final Loss acheived:", loss)

Shape of X: (16512, 8)
a = [[-1.00660435e+08]
 [ 2.98393845e+07]
 [ 2.47413214e+07]
 [ 2.33281110e+09]
 [ 1.17542437e+09]
 [ 4.28531859e+08]
 [ 3.86519060e+06]
 [ 1.14760540e+06]]
b = [[841467.05727963]
 [841467.05727963]
 [841467.05727963]
 ...
 [841467.05727963]
 [841467.05727963]
 [841467.05727963]]
Shape of X: (16512, 8)
a = [[ 3.97288351e+15]
 [-1.18183601e+15]
 [-8.32606542e+14]
 [-1.45519120e+17]
 [-7.50952107e+16]
 [-2.62275919e+16]
 [-1.36278220e+14]
 [-4.89294209e+13]]
b = [[-3.32521067e+13]
 [-3.32521067e+13]
 [-3.32521067e+13]
 ...
 [-3.32521067e+13]
 [-3.32521067e+13]
 [-3.32521067e+13]]
Shape of X: (16512, 8)
a = [[-2.48789953e+23]
 [ 7.40080487e+22]
 [ 5.21402798e+22]
 [ 9.11278815e+24]
 [ 4.70463531e+24]
 [ 1.64270537e+24]
 [ 8.53218706e+21]
 [ 3.06396618e+21]]
b = [[2.08232138e+21]
 [2.08232138e+21]
 [2.08232138e+21]
 ...
 [2.08232138e+21]
 [2.08232138e+21]
 [2.08232138e+21]]
Shape of X: (16512, 8)
a = [[ 1.55813332e+31]
 [-4.63500967e+30]
 [-3.26547349e+30]
 [-5.70717

# Lasso Regression


In [None]:
def lasso_fit(x,y,l1 = 1, itr = 10):
    m,n = x.shape
    for _ in range(itr):
        w = np.random.rand(x.shape[1],1)
        for j in range(n):
            yi = np.dot(x,w)
            diff = y - yi
    