In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn import preprocessing
from copy import deepcopy

In [2]:
file_name = "Housing.csv"
path = ".\data"
path_data = os.path.join(path, file_name)

In [3]:
housing_data_df  = pd.read_csv(path_data)

In [4]:
housing_data_df

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,1820000,3000,2,1,1,yes,no,yes,no,no,2,no,unfurnished
541,1767150,2400,3,1,1,no,no,no,no,no,0,no,semi-furnished
542,1750000,3620,2,1,1,yes,no,no,no,no,0,no,unfurnished
543,1750000,2910,3,1,1,no,no,no,no,no,0,no,furnished


In [5]:
def linear_regresion(X, w, bias):
    """
    Function linear regression
    X - array (nxm) - n objects, m parametrs
    w - regression coefficients? m elements
    bias - scalar
    
    return y_pred, n elements
    """
    X_b = np.hstack((X, np.ones((X.shape[0], 1))))
    w_b = np.hstack((w, bias))
    return np.matmul(X_b, w_b)

In [6]:
def loss(y_pred, y):
    """
    Loss function
    return sum(y_pred*y_pred - y*y) / (n*2)
    n - len(y)
    """
    return np.sum(np.power(y_pred - y, 2)) / len(y) / 2
        

In [7]:
def gradient_descent(X, y, w, bias, learning_rate):
    """
    Perfoms one step gradint descent
    return w, bias
    """
    X_b = np.hstack((X, np.ones((X.shape[0], 1))))
    w_b = np.hstack((w, bias))
    
    m = len(y)
    y_pred = linear_regresion(X, w, bias)
    error = y_pred - y
    gradient = np.matmul(X_b.T, error) / m
    w_b = w_b - learning_rate * gradient
    bias = w_b[-1]
    w = w_b[:-1]
    return w, bias

In [8]:
# Create X from columns "area", "bedrooms", "bathrooms"
X = housing_data_df[["area", "bedrooms", "bathrooms"]].to_numpy()

In [9]:
# Scaled X
scaler = preprocessing.StandardScaler().fit(X)
X_scaled = scaler.transform(X)

In [10]:
# Create y from column "price"
# Create 'w' and 'bias' as random 
y = housing_data_df[["price"]].to_numpy().reshape(-1,)
w = np.random.random(X_scaled.shape[1])
bias = np.random.random()

In [11]:
# One step gradient descent
print (w, bias)
w, bias = gradient_descent(X_scaled, y, w, bias, 0.1)
print (w, bias)

[0.38562792 0.12161657 0.81978516] 0.25144486182024994
[100163.37691657  68487.64817831  96715.60482806] 476673.1510710179


In [12]:
# Calculate w and bias by using the gradient_descent() 
# while the difference between the loss_pres and the loss_prev is greater then epsilon

y_pred = linear_regresion(X_scaled, w, bias)
epsilon = 10e-3 
loss_pres = loss(y_pred, y)
loss_prev = deepcopy(loss_pres) + 10
step = 0

while (loss_prev - loss_pres) > epsilon:
    step += 1
    w, bias = gradient_descent(X_scaled, y, w, bias, 0.1)
    y_pred = linear_regresion(X_scaled, w, bias)
    loss_prev = deepcopy(loss_pres)
    loss_pres = loss(y_pred, y)

print ('Steps: ', step, ' | loss_prev - loss_pres: ', loss_prev - loss_pres)
# print('loss_prev', loss_prev)
# print('loss_pres', loss_pres)
# print ('(loss_prev - loss_pres) > epsilon:', (loss_prev - loss_pres) > epsilon)
# print (loss_prev - loss_pres, epsilon)


w_gradient = np.hstack((w, bias))
w_gradient

Steps:  202  | loss_prev - loss_pres:  0.009521484375


array([ 821214.18698128,  299983.89028455,  695808.18420062,
       4766729.24525482])

In [13]:
def analytic_solution(X, y):
    """
    Function linear regression
    X - array (nxm) - n objects, m parametrs
    returns array m-features and bias
    """    
    X_b = np.hstack((X, np.ones((X.shape[0], 1))))
    w = np.matmul(np.linalg.inv(np.matmul(X_b.T, X_b)), np.matmul(X_b.T, y))
    # w1 = np.linalg.inv(X_b.T @ X_b) @ X_b.T @ y
    return w

In [14]:
w_analytic = analytic_solution(X_scaled, y)
w_analytic

array([ 821214.14349519,  299983.57107963,  695808.52272538,
       4766729.24770642])

In [15]:
# difference w_analytic vs w_gradient
diff = w_gradient / w_analytic
diff

array([1.00000005, 1.00000106, 0.99999951, 1.        ])

## The difference between the results does not exceed 10e-5