In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Linear Regression

$$h(\vec{x}) = \vec{w} * \vec{x}$$

In [None]:
def h(X, W):
  return np.dot(X, W)

# Loss function

$$J(\vec{w}) = (1/2*m)\sum_{i=1}^m(h(x^i)- y^i)^2$$

In [None]:
def loss_function(X, Y, W):
  m = X.shape[0]
  return np.square(h(X, W) - Y).sum()/(2*m)



# Gradient Descent

$$h(\vec{w}) = \vec{w}^{prev} - \alpha∇J(\vec{w})$$

In [None]:
def grad_step(W, grad_w, learning_rate=0.001):
  W = W - learning_rate*grad_w
  return W

$$∇J(\vec{w}) = 1/m*X^T(X(\vec{w}) - \vec{y})$$

$$where:$$

$$X\vec{w} = \vec{h}(X)$$



In [None]:
def grad(X, Y, W):
  m = X.shape[0]
  np.dot(X.T, (h(X, W) - Y))/m
  return np.dot(X.T, (h(X, W) - Y)) / m


In [None]:
def grad_descent(X, Y, W, num_iter=10000, learning_rate=0.001, epsilon=0.0000001):
  loss = loss_function(X, Y, W)
  loss_history = [loss]
  for i in range(num_iter):
    best = None
    grad_w = grad(X, Y, W)
    W = grad_step(W, grad_w, learning_rate=learning_rate)
    loss = loss_function(X, Y, W)
    if abs(loss - loss_history[-1]) < epsilon:
      loss_history.append(loss)
      best = grad_w
      break
    loss_history.append(loss)
  return W, best, loss_history


# Best vector 'w' parameters of dataset. Building price forecast depends on area, bathrooms and bedrooms quantity

In [None]:
table = pd.read_csv('Housing.csv')

In [None]:
table.head(10)

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
5,10850000,7500,3,3,1,yes,no,yes,no,yes,2,yes,semi-furnished
6,10150000,8580,4,3,4,yes,no,no,no,yes,2,yes,semi-furnished
7,10150000,16200,5,3,2,yes,no,no,no,no,0,no,unfurnished
8,9870000,8100,4,1,2,yes,yes,yes,no,yes,2,yes,furnished
9,9800000,5750,3,2,4,yes,yes,no,no,yes,1,yes,unfurnished


In [None]:
def normalize(value):
  return (value - value.mean())/value.std()


In [None]:
normalized_table = pd.DataFrame()

In [None]:
normalized_table['price'] = normalize(table['price'])
normalized_table['area'] = normalize(table['area'])
normalized_table['bedrooms'] = normalize(table['bedrooms'])
normalized_table['bathrooms'] = normalize(table['bathrooms'])

In [None]:
normalized_table.head(10)

Unnamed: 0,price,area,bedrooms,bathrooms
0,4.562174,1.045766,1.402131,1.420507
1,4.000809,1.755397,1.402131,5.400847
2,4.000809,2.216196,0.047235,1.420507
3,3.982096,1.08263,1.402131,1.420507
4,3.551716,1.045766,1.402131,-0.569663
5,3.252321,1.08263,0.047235,3.410677
6,2.878078,1.580293,1.402131,3.410677
7,2.878078,5.091586,2.757028,3.410677
8,2.72838,1.359109,1.402131,-0.569663
9,2.690956,0.27623,0.047235,1.420507


In [None]:
Y = normalized_table["price"].values.reshape(-1, 1)
X = normalized_table[['area', 'bathrooms', 'bedrooms']].values
X = np.hstack((np.ones((X.shape[0], 1)), X))
N = X.shape[1]
W = np.linspace(0, 0, N).reshape((N, 1))



In [None]:
W, best, loss_history = grad_descent(X, Y, W, 10000, learning_rate=0.001)
loss = loss_history[-1]
print(f'Best values: {best}')
print(f'Loss func: {loss}')

Best values: [[-1.46671666e-17]
 [-6.74033514e-03]
 [-4.72771123e-03]
 [ 5.66844067e-03]]
Loss func: 0.25605342833253486


# Normal equation

$$(\vec{w})^* = (X^TX)^{-1}X^T\vec{y}$$

In [None]:
theta = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, Y))
analytical = loss_function(X, Y, theta)
print(f'Best values: {theta}')

Best values: [[-3.69936497e-16]
 [ 4.39452085e-01]
 [ 3.72344423e-01]
 [ 1.60528660e-01]]


In [None]:
print(f'Analytical value of loss func: {analytical} and value of loss function {loss} ')

Analytical value of loss func: 0.2559879006532141 and value of loss function 0.25605342833253486 
