In [37]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [40]:
from google.colab import drive
drive.mount('/content/gdrive')
from google.colab import files

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [41]:
# Linear Regression

def h(X, W):
  return np.dot(X, W)

In [42]:
# Loss function

def loss_function(X, Y, W):
  m = X.shape[0]
  return np.square(h(X, W) - Y).sum()/(2*m)

In [43]:
# Gradient Descent

def grad_step(W, grad_w, learning_rate=0.001):
  W = W - learning_rate*grad_w
  return W

In [44]:
def grad(X, Y, W):
  m = X.shape[0]
  np.dot(X.T, (h(X, W) - Y))/m
  return np.dot(X.T, (h(X, W) - Y)) / m

In [45]:
def grad_descent(X, Y, W, num_iter=10000, learning_rate=0.001, epsilon=0.0000001):
  loss = loss_function(X, Y, W)
  loss_history = [loss]
  for i in range(num_iter):
    best = None
    grad_w = grad(X, Y, W)
    W = grad_step(W, grad_w, learning_rate=learning_rate)
    loss = loss_function(X, Y, W)
    if abs(loss - loss_history[-1]) < epsilon:
      loss_history.append(loss)
      best = grad_w
      break
    loss_history.append(loss)
  return W, best, loss_history

In [46]:
df = pd.read_csv('Housing.csv')
df

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished
...,...,...,...,...,...,...,...,...,...,...,...,...,...
540,1820000,3000,2,1,1,yes,no,yes,no,no,2,no,unfurnished
541,1767150,2400,3,1,1,no,no,no,no,no,0,no,semi-furnished
542,1750000,3620,2,1,1,yes,no,no,no,no,0,no,unfurnished
543,1750000,2910,3,1,1,no,no,no,no,no,0,no,furnished


In [47]:
def normalize(value):
  return (value - value.mean())/value.std()

In [48]:
normal_df = pd.DataFrame()

In [49]:
normal_df['price'] = normalize(df['price'])
normal_df['area'] = normalize(df['area'])
normal_df['bedrooms'] = normalize(df['bedrooms'])
normal_df['bathrooms'] = normalize(df['bathrooms'])

In [51]:
normal_df.head(5)

Unnamed: 0,price,area,bedrooms,bathrooms
0,4.562174,1.045766,1.402131,1.420507
1,4.000809,1.755397,1.402131,5.400847
2,4.000809,2.216196,0.047235,1.420507
3,3.982096,1.08263,1.402131,1.420507
4,3.551716,1.045766,1.402131,-0.569663


In [52]:
Y = normal_df["price"].values.reshape(-1, 1)
X = normal_df[['area', 'bathrooms', 'bedrooms']].values
X = np.hstack((np.ones((X.shape[0], 1)), X))
N = X.shape[1]
W = np.linspace(0, 0, N).reshape((N, 1))

In [53]:
W, best, loss_history = grad_descent(X, Y, W, 10000, learning_rate=0.001)
loss = loss_history[-1]
print(f'Best values: {best}')
print(f'Loss func: {loss}')

Best values: [[-1.46671666e-17]
 [-6.74033514e-03]
 [-4.72771123e-03]
 [ 5.66844067e-03]]
Loss func: 0.25605342833253486


In [54]:
theta = np.dot(np.linalg.inv(np.dot(X.T, X)), np.dot(X.T, Y))
analytical = loss_function(X, Y, theta)
print(f'Best values: {theta}')

Best values: [[-3.69936497e-16]
 [ 4.39452085e-01]
 [ 3.72344423e-01]
 [ 1.60528660e-01]]


In [55]:
print(f'Analytical value of loss func: {analytical} and value of loss function {loss} ')

Analytical value of loss func: 0.2559879006532141 and value of loss function 0.25605342833253486 


In [None]:
# Висновок: отримані данні майже однакові між собою