In [239]:
from google.colab import drive
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
%matplotlib inline

In [240]:
drive.mount('/content/drive/')

housing = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/Housing.csv')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


Гіпотеза

In [241]:
def h(X, w):
    return np.dot(X, w)

Функція втрат

In [242]:
def loss_function(X, y, w):
    m = y.shape[0]
    
    y_pred = h(X, w)        
    cost = (1 / (2 * m)) * np.sum(np.square(y_pred - y))
    return cost

Градієнтний спуск

In [243]:
def gradient(X, y, w):
    m = y.size
    y_pred = h(X, w)
    grad = X.T @ (y_pred - y)
    return grad / m

In [244]:
def grad_descent(X, y, w, num_iterations, learning_rate):
    best_loss = np.inf
    loss_history = []
    for i in range(num_iterations):    
        w -= learning_rate * gradient(X, y, w)

        loss = loss_function(X, y, w)
        loss_history.append(loss)
    
        if loss < best_loss:
            best_loss = loss
            best_weights = w.copy()

    return best_weights, loss_history

Нормалізація

In [245]:
def norm(data):
    data_mean = data.mean(axis = 0)
    data_std = data.std(axis = 0)
    return (data - data_mean) / data_std

Розрахунок

In [246]:
X = housing[["area", "bedrooms", "bathrooms"]].values
y = housing["price"].values.reshape(-1, 1)

X_norm = norm(X)
y_norm = norm(y)

X_norm = np.hstack((np.ones((y.size, 1)), X_norm))

n = X.shape[1]
w = np.zeros((n + 1, 1))

learning_rate = 0.01
num_iterations = 2000

w, loss_history = grad_descent(X_norm, y_norm, w, num_iterations, learning_rate)

Аналітичне рішення

In [247]:
X_norm_analytical = np.hstack((np.ones((y.size, 1)), X_norm[:, 1:]))
w_analytical = np.linalg.inv(
    X_norm_analytical.T @ X_norm_analytical
    ) @ X_norm_analytical.T @ y_norm

Порівняння коефіцієнтів

In [248]:
print(f'Коефіцієнти (аналітичне рішення): {w_analytical}')
print(f'Коефіцієнти (градієнтий спуск): {w}')

Коефіцієнти (аналітичне рішення): [[6.82595372e-16]
 [4.39452085e-01]
 [1.60528660e-01]
 [3.72344423e-01]]
Коефіцієнти (градієнтий спуск): [[2.97205685e-16]
 [4.39452124e-01]
 [1.60528959e-01]
 [3.72344106e-01]]
