In [43]:
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression

In [44]:
data = pd.read_csv("Housing.csv")

In [45]:
X = data[['area', 'bedrooms', 'bathrooms']].values
y = data['price'].values

In [46]:
X

array([[7420,    4,    2],
       [8960,    4,    4],
       [9960,    3,    2],
       ...,
       [3620,    2,    1],
       [2910,    3,    1],
       [3850,    3,    1]])

In [47]:
y

array([13300000, 12250000, 12250000, 12215000, 11410000, 10850000,
       10150000, 10150000,  9870000,  9800000,  9800000,  9681000,
        9310000,  9240000,  9240000,  9100000,  9100000,  8960000,
        8890000,  8855000,  8750000,  8680000,  8645000,  8645000,
        8575000,  8540000,  8463000,  8400000,  8400000,  8400000,
        8400000,  8400000,  8295000,  8190000,  8120000,  8080940,
        8043000,  7980000,  7962500,  7910000,  7875000,  7840000,
        7700000,  7700000,  7560000,  7560000,  7525000,  7490000,
        7455000,  7420000,  7420000,  7420000,  7350000,  7350000,
        7350000,  7350000,  7343000,  7245000,  7210000,  7210000,
        7140000,  7070000,  7070000,  7035000,  7000000,  6930000,
        6930000,  6895000,  6860000,  6790000,  6790000,  6755000,
        6720000,  6685000,  6650000,  6650000,  6650000,  6650000,
        6650000,  6650000,  6629000,  6615000,  6615000,  6580000,
        6510000,  6510000,  6510000,  6475000,  6475000,  6440

In [48]:
def hypothesis(X, w):
    return np.dot(X, w)

In [49]:
def mse(y_true, y_pred):
    result = np.mean((y_true - y_pred) ** 2)
    return result

In [50]:
def gradient_step(X, y, w, lr):
    n = len(y)
    gradients = -2/n * np.dot(X.T, y - X.dot(w))
    w -= lr * gradients
    return w

In [51]:
def linear_regression(X, y, iterations=1000, lr=0.001):
    
    X_b = np.c_[np.ones((X.shape[0], 1)), X]
    w = np.zeros(X_b.shape[1])

    for i in range(iterations):
        w = gradient_step(X_b, y, w, lr)

    intercept = w[0]
    weights = w[1:]

    return intercept, weights

In [52]:
def analytical_solution(X, y):
    XT_X = np.dot(X.T, X)
    XT_y = np.dot(X.T, y)
    w = np.linalg.solve(XT_X, XT_y)
    return w

In [53]:
def sklearn_regression(X, y):
    model = LinearRegression()
    model.fit(X, y)
    return model.coef_, model.intercept_

In [54]:
X_mean = np.mean(X, axis=0)
X_std = np.std(X, axis=0)
X_norm = (X - X_mean) / X_std

y_mean = np.mean(y)
y_std = np.std(y)
y_norm = (y - y_mean) / y_std

# Градієнтний спуск
intercept, weights = linear_regression(X_norm, y_norm, iterations=5000, lr=0.001)

# Аналітичне рішення
weights_analytic = analytical_solution(X_norm, y_norm)

# scikit-learn
weights_sklearn, intercept_sklearn = sklearn_regression(X_norm, y_norm)

print(f'Знайдені параметри w: {weights}')
print(f'Аналітичне рішення: {weights_analytic}')
print(f'scikit-learn: {weights_sklearn}')

Знайдені параметри w: [0.4394435  0.16069884 0.37218555]
Аналітичне рішення: [0.43945209 0.16052866 0.37234442]
scikit-learn: [0.43945209 0.16052866 0.37234442]
