<a href="https://colab.research.google.com/github/Oleksandr190378/data-computing/blob/main/Hm_3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.linear_model import LinearRegression

In [2]:
def linear_regression_hypothesis(W, X):
  return np.dot(X,W)


In [3]:
def mean_squared_error( X, W, Y):
  n = Y.shape[0]
  hyp = linear_regression_hypothesis(W,X)
  cost = 0
  for i in range(n):
    cost = cost + (hyp[i] - Y[i])**2

  return cost/(2.0*n)

In [4]:
def gradient_descent_step(X, Y, W, learning_rate = 0.1):
  hypothesis = linear_regression_hypothesis(W, X)
  error = hypothesis - Y
  gradient = np.dot(X.T, error) / len(Y)
  W = W - learning_rate * gradient

  return W

In [5]:
def mean_normalize(X):
    mean = np.mean(X)
    std = np.max(X) - np.min(X)
    X_norm = (X - mean) / std
    return X_norm

In [6]:
def normalize_column(column):
    mean = column.mean()
    std = column.max() - column.min()
    if std == 0:
        return column * 0  # Якщо std == 0, повертаємо нулі
    return (column - mean) / std

In [7]:
import pandas as pd
data = {
    'A': [6, 2, 4],
    'B': [4, 5, 6],
    'C': [7, 8, 9]
}
df = pd.DataFrame(data)

normalized_df = df.apply(normalize_column)

print(normalized_df)

     A    B    C
0  0.5 -0.5 -0.5
1 -0.5  0.0  0.0
2  0.0  0.5  0.5


In [8]:
def gradient_descent(X, Y, W, learning_rate=0.01, eps = 0.0000001):
  #mse - mean squared error
  # n - quantity of iterations
  loss = [0,]
  n=0
  while True:

        W = gradient_descent_step(X_norm, Y_norm, W, learning_rate)
        mse = mean_squared_error(X_norm,  W, Y_norm)
        n=n+1
        if abs(mse-loss[-1]) < eps:  # Перевірка, чи MSE менше eps
            break
        loss.append(mse)
  return W, n

In [9]:
from google.colab import drive
drive.mount('/content/drive')


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [10]:
import pandas as pd
file_path = r'/content/drive/My Drive/Colab Notebooks/Housing.csv'
users = pd.read_csv(file_path)

users.head()


Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus
0,13300000,7420,4,2,3,yes,no,no,no,yes,2,yes,furnished
1,12250000,8960,4,4,4,yes,no,no,no,yes,3,no,furnished
2,12250000,9960,3,2,2,yes,no,yes,no,no,2,yes,semi-furnished
3,12215000,7500,4,2,2,yes,no,yes,no,yes,3,yes,furnished
4,11410000,7420,4,1,2,yes,yes,yes,no,yes,2,no,furnished


In [11]:
users = users[['price', 'area', 'bedrooms', 'bathrooms']]
users.dtypes

price        int64
area         int64
bedrooms     int64
bathrooms    int64
dtype: object

In [12]:
users.shape

(545, 4)

In [13]:
users.head()

Unnamed: 0,price,area,bedrooms,bathrooms
0,13300000,7420,4,2
1,12250000,8960,4,4
2,12250000,9960,3,2
3,12215000,7500,4,2
4,11410000,7420,4,1


In [14]:
Y = users['price']
Y.head()

0    13300000
1    12250000
2    12250000
3    12215000
4    11410000
Name: price, dtype: int64

In [15]:
Y_norm = mean_normalize(Y)
Y_norm.tail()

540   -0.255128
541   -0.259704
542   -0.261189
543   -0.261189
544   -0.261189
Name: price, dtype: float64

In [45]:
W = np.array([0.0,  0.7, 0.3, 0.5])

In [17]:
X = users[['area', 'bedrooms', 'bathrooms']]

X.head()

Unnamed: 0,area,bedrooms,bathrooms
0,7420,4,2
1,8960,4,4
2,9960,3,2
3,7500,4,2
4,7420,4,1


In [18]:
X_norm = X.apply(normalize_column)

X_norm.head()

Unnamed: 0,area,bedrooms,bathrooms
0,0.155977,0.206972,0.23792
1,0.261818,0.206972,0.904587
2,0.330547,0.006972,0.23792
3,0.161475,0.206972,0.23792
4,0.155977,0.206972,-0.095413


In [19]:
from sklearn.linear_model import LinearRegression

regressor = LinearRegression().fit(X_norm, Y_norm)

print(regressor.coef_)

[0.47714269 0.17611257 0.36001286]


In [20]:
regressor.intercept_

7.014204135081996e-17

In [21]:
X_norm.insert(0, 'C', 1)
X_norm.head()

Unnamed: 0,C,area,bedrooms,bathrooms
0,1,0.155977,0.206972,0.23792
1,1,0.261818,0.206972,0.904587
2,1,0.330547,0.006972,0.23792
3,1,0.161475,0.206972,0.23792
4,1,0.155977,0.206972,-0.095413


In [46]:

a, n = gradient_descent(X_norm, Y_norm, W)
print(a)
print(n)

[5.35366858e-17 5.72352524e-01 2.11528191e-01 3.88763182e-01]
3274


In [37]:
import numpy as np

def analytical_solution(X, Y):
    w = np.linalg.inv(X.T.dot(X)).dot(X.T).dot(Y)
    return w

In [36]:
b = analytical_solution(X_norm,Y_norm)
b

array([2.15869560e-17, 4.77142690e-01, 1.76112569e-01, 3.60012857e-01])

In [47]:
res = a - b
res

array([3.19497297e-17, 9.52098346e-02, 3.54156225e-02, 2.87503253e-02])