In [1]:
import numpy as np
import pandas as pd

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [2]:
df = pd.read_csv('Housing.csv')
df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [3]:
df.shape

(545, 13)

Функція гіпотези лінійної регресії у векторному вигляді:

In [4]:
def hypothesis(X, w):
    return np.dot(X, w)


Функція для обчислення функції втрат у векторному вигляді:

In [5]:
def compute_cost(X, y, w):
    m = len(y)
    predictions = hypothesis(X, w)
    squared_errors = (predictions - y) ** 2
    cost = 1 / (2 * m) * np.sum(squared_errors)
    return cost

Реалізація одного кроку градієнтного спуску:

In [6]:
def gradient_descent_step(X, y, w, learning_rate):
    m = len(y)
    predictions = hypothesis(X, w)
    errors = predictions - y
    gradient = np.dot(X.T, errors) / m
    w -= learning_rate * gradient
    return w
    
    


In [7]:
X = df[['area', 'bedrooms', 'bathrooms']].values
y = df['price'].values.reshape(-1,1)

Знаходження найкращих параметрів vec_w

In [11]:
def train_linear_regression(X, y, initial_weights, learning_rate, num_iterations):
    w = initial_weights.copy()
    loss_history = []

    for i in range(num_iterations):
        w = gradient_descent_step(X, y, w, learning_rate)
        current_loss = compute_cost(X, y, w)
        loss_history.append(current_loss)
    
    return w, loss_history


initial_weights = np.zeros((X.shape[1], 1))
learning_rate = 0.05
num_iters = 10

optimal_weights, loss_history = train_linear_regression(X, y, initial_weights, learning_rate, num_iters)

print("Optimal weights:", optimal_weights)
print("Final loss:", loss_history[-1])


Optimal weights: [[-7.37222047e+64]
 [-3.66259230e+61]
 [-1.61372418e+61]]
Final loss: 8.486414115350273e+136


Знаходження параметрів vec_w за допомогою аналітичного рішення:

In [24]:
from sklearn import linear_model

In [25]:
model = linear_model.LinearRegression()

In [29]:
def normal(data):
    mean = np.mean(data)
    value_range = np.max(data) - np.min(data)
    result = []
    for x in data:
        norm_x = (x - mean) / value_range
        result.append(norm_x)
    return result



norm = pd.DataFrame()


norm['price'] = normal(df.price)
norm['area'] = normal(df.area)
norm['bedrooms'] = normal(df.bedrooms)
norm['bathrooms'] = normal(df.bathrooms)

norm.head(7)

Unnamed: 0,price,area,bedrooms,bathrooms
0,0.738811,0.155977,0.206972,0.23792
1,0.647902,0.261818,0.206972,0.904587
2,0.647902,0.330547,0.006972,0.23792
3,0.644872,0.161475,0.206972,0.23792
4,0.575175,0.155977,0.206972,-0.095413
5,0.52669,0.161475,0.006972,0.571254
6,0.466084,0.235702,0.206972,0.571254


In [32]:
X = np.array([norm.area, norm.bedrooms, norm.bathrooms]).T

In [33]:
X.shape

(545, 3)

In [34]:
Y = np.array(norm.price)

In [31]:
y.shape

(545, 1)

In [35]:
model.fit(X, Y)

In [36]:
model.coef_

array([0.47714269, 0.17611257, 0.36001286])

In [37]:
model.intercept_

7.014204135081996e-17

In [38]:
X = np.array([norm.area, norm.bedrooms, norm.bathrooms])
Y = np.array(norm.price)

C = np.dot(X, X.T)
C_1 = np.linalg.inv(C)

C_ALL = np.dot(C_1, X)
W = np.dot(C_ALL, Y)

print(W)



[0.47714269 0.17611257 0.36001286]
