In [2]:
import numpy as np
import pandas as pd

Note : Pandas library is only used for data cleaning. All implementations are done using only numpy

In [3]:
class LinearRegression:
  W = None
  lamba = None
  L = None

  def __init__(self, n_features, lamda = 0):
    self.L = np.identity(n_features + 1)
    self.L[0][0] = 0
    self.lamda = lamda

  def fit(self, X_old, Y):
    X = np.copy(X_old)
    bias = np.ones((X.shape[0], 1))
    X = np.hstack(( bias, X))

    # (X.T X + lambda.L)^-1 . X.T Y
    A = np.matmul(X.T, X) + self.lamda*self.L
    B = np.matmul(X.T, Y)
    self.W = (np.linalg.solve(A, B))
    return self.W

  def predict(self, X_old):
    X = np.copy(X_old)
    bias = np.ones((X.shape[0], 1))
    X = np.hstack(( bias, X))
    
    return np.matmul(X, self.W)

  def predictionError(self, X, Y):
    prediction_error = np.mean(np.abs((Y - self.predict(X))/Y))
    return f"{round(prediction_error * 100, 2)}%"

## Data preprocessing

In [4]:
df = pd.read_csv('housing_price_dataset.csv')

In [5]:
df = df.rename(columns={"lotsize": "plotsize","bedrooms":"n_bedrooms","bathrms":"n_bathrooms"})
df = df[ ['plotsize','n_bedrooms','n_bathrooms','price']]

In [6]:
df.head()

Unnamed: 0,plotsize,n_bedrooms,n_bathrooms,price
0,5850,3,1,42000.0
1,4000,2,1,38500.0
2,3060,3,1,49500.0
3,6650,3,1,60500.0
4,6360,2,1,61000.0


In [7]:
len(df)

546

In [68]:
train_size = 0.75
N = len(df)

data_np = df.to_numpy(dtype=np.float64)
np.random.seed(42) 
np.random.shuffle(data_np)
train, test = data_np[: int(N*train_size),:], data_np[ int(N*train_size):,:]

In [69]:
X_train, Y_train = train[:,0:3], train[:,3:4] 
X_test, Y_test = test[:,0:3], test[:,3:4] 

print(X_train.shape)
print(Y_train.shape)

(409, 3)
(409, 1)


In [70]:
n_features = X_train.shape[1]
n_samples = X_train.shape[0]

## Without regularization


In [71]:
print('Without Regularization:')
lr_without_reg = LinearRegression(n_features = n_features)
lr_without_reg.fit(X_train, Y_train)
print(f'Prediction Error : {lr_without_reg.predictionError(X_test, Y_test)} ')

Without Regularization:
Prediction Error : 21.96% 


## With Regularization

In [72]:
# lamda_list = np.arange(0, 500, 10)
lamda_list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 100, 1000, 10000, 10000000]
print('With Regularization:')
  
for lamda in lamda_list:
  lr_with_reg = LinearRegression(n_features = n_features, lamda = lamda)
  lr_with_reg.fit(X_train, Y_train)
  print(f'Prediction Error with lamda = {lamda} : {lr_with_reg.predictionError(X_test, Y_test)}')

With Regularization:
Prediction Error with lamda = 0 : 21.96%
Prediction Error with lamda = 1 : 21.95%
Prediction Error with lamda = 2 : 21.95%
Prediction Error with lamda = 3 : 21.95%
Prediction Error with lamda = 4 : 21.95%
Prediction Error with lamda = 5 : 21.95%
Prediction Error with lamda = 6 : 21.94%
Prediction Error with lamda = 7 : 21.94%
Prediction Error with lamda = 8 : 21.94%
Prediction Error with lamda = 9 : 21.94%
Prediction Error with lamda = 10 : 21.94%
Prediction Error with lamda = 100 : 22.34%
Prediction Error with lamda = 1000 : 24.45%
Prediction Error with lamda = 10000 : 25.49%
Prediction Error with lamda = 10000000 : 25.68%


In [80]:
def feature_scale(a):
  return (a - np.mean(a)) / np.std(a)

In [136]:
import random
class LinearRegressionWithGDA:
  W = None
  alpha = None

  def __init__(self, n_features, lamda = 0, alpha = 0.01):
    self.L = np.identity(n_features + 1)
    self.L[0][0] = 0
    self.lamda = lamda
    self.alpha = alpha
    self.W = np.zeros((n_features + 1, 1))
    self.n_features = n_features

  def trainWithSGD(self, samples, n_epochs = 5):
    
    for epoch in range(n_epochs):
      np.random.shuffle(samples)
      for sample in samples:
        dW = self.calculateLoss(np.array([sample]))
        self.W = self.W - self.alpha*dW
    
    return self.W

  def trainWithMBGD(self, D, batch_size, n_epochs = 5):

    for _ in range(n_epochs):
      batch = D[np.random.choice(D.shape[0], 2, replace=False), :]
      # a[np.random.choice(len(a))]
      dW = self.calculateLoss(np.array(batch))
      self.W = self.W - self.alpha*dW

  def calculateLoss(self, D):
    X_old, Y = D[:, 0: self.n_features], D[:, self.n_features:]
    X = np.copy(X_old)
    bias = np.ones((X.shape[0], 1))
    X = np.hstack(( bias, X))

    L = np.ones(self.W.shape)
    L[0][0] = 0
    return (np.matmul( X.T, self.predict(X) - Y) + np.multiply(self.lamda*L, self.W))/ len(D)

  def predict(self, X):
    # print(X.shape, self.W.shape)
    return np.matmul(X, self.W)

  def predictionError(self, D):
    X_old, Y = D[:, 0: self.n_features], D[:, self.n_features:]
    X = np.copy(X_old)
    bias = np.ones((X.shape[0], 1))
    X = np.hstack(( bias, X))
    prediction_error = np.mean(np.square((Y - self.predict(X)))) / 2
    return f"{round(prediction_error, 2)}"

  def predictionErrorp(self, D):
    X_old, Y = D[:, 0: self.n_features], D[:, self.n_features:]
    X = np.copy(X_old)
    bias = np.ones((X.shape[0], 1))
    X = np.hstack(( bias, X))
    prediction_error = np.mean(np.abs((Y - self.predict(X))/Y))
    return f"{round(prediction_error, 2)}%"

## Stochastic, batch and mini batch without scaling and without regularization

In [137]:
print('With Batch Stochastic:')
lr_with_MBGD = LinearRegressionWithGDA(n_features=n_features)
W = lr_with_MBGD.trainWithMBGD(train, batch_size = n_samples)
print(f'Prediction Error : {lr_with_MBGD.predictionError(test)} ')


With Batch Stochastic:
Prediction Error : 1.5659815289266656e+63 


In [138]:
print('With Mini Batch Stochastic:')
lr_with_MBGD = LinearRegressionWithGDA(n_features=n_features)
W = lr_with_MBGD.trainWithMBGD(train, batch_size = 32)
print(f'Prediction Error : {lr_with_MBGD.predictionError(test)} ')


With Mini Batch Stochastic:
Prediction Error : 1.1966370948548148e+63 


In [143]:
import warnings
warnings.filterwarnings('ignore')

print('With Stochastic:')
lr_with_SGD = LinearRegressionWithGDA(n_features=n_features)
W = lr_with_SGD.trainWithSGD(train)
print(f'Prediction Error : {lr_with_MBGD.predictionError(test)} ')


With Stochastic:
Prediction Error : 1.1966370948548148e+63 


In [98]:
from sklearn.preprocessing import normalize
def feature_scale_dataset(D):
  return (D- np.min( D, axis = 0)) / ( np.max(D,axis=0) - np.min(D,axis=0))

## Stochastic, batch and mini batch with scaling and without regularization

In [99]:
train_scaled = feature_scale_dataset(train)
test_scaled = feature_scale_dataset(test)
X_train_scaled, Y_train_scaled = train_scaled[:,0:3], train_scaled[:,3:4] 
X_test_scaled, Y_test_scaled = test_scaled[:,0:3], test_scaled[:,3:4] 
print(train_scaled)

[[0.35821306 0.4        0.         0.26666667]
 [0.14639175 0.2        0.         0.18484848]
 [0.10103093 0.4        0.         0.15151515]
 ...
 [0.08659794 0.2        0.         0.07393939]
 [0.19587629 0.6        0.33333333 0.15757576]
 [0.18570447 0.6        0.         0.10606061]]


In [102]:
print('With feature scaling:')

print('With Stochastic:')
lr_with_SGD_scaled = LinearRegressionWithGDA(n_features=n_features)
W = lr_with_SGD_scaled.trainWithSGD(train_scaled, n_epochs = 200)
print(f'Prediction Error : {lr_with_SGD_scaled.predictionError(test_scaled)} ')


With feature scaling:
With Stochastic:
[[0.03639746]
 [0.47049228]
 [0.19383367]
 [0.3564152 ]]
Prediction Error : 0.01 


In [122]:
print('With Batch Gradient:')
lr_with_BGD_scaled = LinearRegressionWithGDA(n_features=n_features)
W = lr_with_BGD_scaled.trainWithMBGD(train_scaled, n_epochs = 100, batch_size = n_samples)
print(f'Prediction Error : {lr_with_BGD_scaled.predictionError(test_scaled)} ')


With Batch Gradient:
Prediction Error : 0.02 


In [121]:
print('With Mini Gradient:')
batch_sizes = [2, 4, 8, 16, 32]
for batch_size in batch_sizes:
  lr_with_BGD_scaled = LinearRegressionWithGDA(n_features=n_features)
  W = lr_with_BGD_scaled.trainWithMBGD(train_scaled, n_epochs = 100, batch_size = batch_size)
  print(f'Prediction Error with batch size = {batch_size}: {lr_with_BGD_scaled.predictionError(test_scaled)} ')


With Mini Gradient:
Prediction Error with batch size = 2: 0.02 
Prediction Error with batch size = 4: 0.02 
Prediction Error with batch size = 8: 0.02 
Prediction Error with batch size = 16: 0.02 
Prediction Error with batch size = 32: 0.02 


## Stochastic, batch and mini batch with scaling and with regularization

In [140]:
print('With feature scaling:')

print('With Stochastic:')
lr_with_SGD_scaled = LinearRegressionWithGDA(n_features=n_features, lamda = 20)
W = lr_with_SGD_scaled.trainWithSGD(train_scaled, n_epochs = 200)
print(f'Prediction Error : {lr_with_SGD_scaled.predictionError(test_scaled)} ')


With feature scaling:
With Stochastic:
Prediction Error : 0.02 


In [141]:
print('With Batch Gradient:')
lr_with_BGD_scaled = LinearRegressionWithGDA(n_features=n_features, lamda = 20)
W = lr_with_BGD_scaled.trainWithMBGD(train_scaled, n_epochs = 100, batch_size = n_samples)
print(f'Prediction Error : {lr_with_BGD_scaled.predictionError(test_scaled)} ')


With Batch Gradient:
Prediction Error : 0.03 


In [142]:
print('With Mini Gradient:')
batch_sizes = [2, 4, 8, 16, 32]
for batch_size in batch_sizes:
  lr_with_BGD_scaled = LinearRegressionWithGDA(n_features=n_features, lamda = 20)
  W = lr_with_BGD_scaled.trainWithMBGD(train_scaled, n_epochs = 100, batch_size = batch_size)
  print(f'Prediction Error with batch size = {batch_size}: {lr_with_BGD_scaled.predictionError(test_scaled)} ')


With Mini Gradient:
Prediction Error with batch size = 2: 0.03 
Prediction Error with batch size = 4: 0.03 
Prediction Error with batch size = 8: 0.02 
Prediction Error with batch size = 16: 0.02 
Prediction Error with batch size = 32: 0.02 
