> Họ và tên: Phùng Dũng Quân\
> MSSV: 22280073

# Bài 1:

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('bang_gia_nha.csv')
df

Unnamed: 0,STT,Dien tich (m2),So phong ngu,Khoang cach toi TT,Gia (ty VND)
0,1,40,1,30.0,1.1
1,2,60,2,32.0,1.55
2,3,53,2,30.1,1.68
3,4,71,2,35.7,1.75
4,5,80,2,24.5,5.5
5,6,56,2,27.6,2.3
6,7,75,2,27.6,3.0
7,8,79,2,27.6,3.5
8,9,56,2,29.7,2.4
9,10,60,2,29.7,2.9


In [2]:
W = df.drop(columns=['Gia (ty VND)', 'STT'], axis=1)
d = df['Gia (ty VND)'].values

def loss_function(x, W, d):
    return 1/2 * np.sum(d - W.dot(x))**2

def nabla_L(x, W, d):
    return -W.T.dot(d - W.dot(x))

def checking_inverse(matrix):
    if np.linalg.det(matrix) == 0:
        return False
    else:
        return True
    
def L2_error(x, y):
    m = len(y)
    sum = 0
    for i in range(m):
        sum += (x[i] - y[i])**2
    return np.sqrt(sum)

## SVD

In [3]:
# SVD decomposition
scaler = StandardScaler()
W_scaled = scaler.fit_transform(W)  # Bỏ bias nếu có
W_scaled = np.c_[np.ones((W_scaled.shape[0])), W_scaled]
house_features = np.array([(79, 2, 26.5)])
scaled_house_features = scaler.transform(house_features)
scaled_house_features = np.c_[np.ones((scaled_house_features.shape[0])), scaled_house_features]

def svd_decomposition(W, d):
    if checking_inverse(W.T @ W):
        x = np.linalg.inv(W.T @ W) @ W.T @ d
    else:
        U, s, Vt = np.linalg.svd(W.T @ W, full_matrices=True)
        s_inv = np.zeros_like(W.T @ W)
        s_inv[:len(s), :len(s)] = np.diag(1/s)
        x = (Vt.T @ s_inv @ U.T) @ W.T @ d
    return x

x_svd = svd_decomposition(W_scaled, d)
predicted_price_svd = scaled_house_features.dot(x_svd)
print("Hệ số dự đoán là:\n", x_svd)
print("Giá trị dự đoán được là:", predicted_price_svd[0])

Hệ số dự đoán là:
 [ 2.59342105  0.73101391  0.19867214 -0.00295743]
Giá trị dự đoán được là: 3.2426832061065363


## Gradient descent

In [4]:
initial_point = np.zeros(W_scaled.shape[1])

# Gradient descent function
def gradient_descent(W, d, initial_point, eta, max_iterations, epsilon):
    current_point = initial_point.copy()
    i = 0
    while i <= max_iterations:
        next_point = current_point - eta * nabla_L(current_point, W, d)
        if np.linalg.norm(nabla_L(next_point, W, d)) < epsilon:
            return next_point
        current_point = next_point
        i += 1
    return current_point

x_gd = gradient_descent(W_scaled, d, initial_point, 0.001, 1000, 1e-6)
predicted_price_gd = scaled_house_features.dot(x_gd)
print("Hệ số dự đoán (Gradient Descent):", x_gd)
print("Giá trị dự đoán (Gradient Descent):", predicted_price_gd[0])
print("Chênh lệch hệ số dự đoán:", L2_error(x_svd, x_gd))

Hệ số dự đoán (Gradient Descent): [ 2.59342104  0.73101383  0.19867211 -0.00295747]
Giá trị dự đoán (Gradient Descent): 3.242683123706925
Chênh lệch hệ số dự đoán: 9.715968447581369e-08


## Accelerated gradient descent

In [5]:
# Accelerated gradient descent function
def accelerated_gradient_descent(W, d, initial_point, eta, max_iterations, epsilon):
    current_point = initial_point.copy()
    previous_point = initial_point.copy()
    i = 0
    while i <= max_iterations - 1:
        temp = current_point + (i - 1) / (i + 2) * (current_point - previous_point) 
        next_point = temp - eta * nabla_L(temp, W, d)
        if np.linalg.norm(nabla_L(next_point, W, d)) < epsilon:
            return next_point
        previous_point = current_point
        current_point = next_point
        i += 1
    return current_point

x_agd = accelerated_gradient_descent(W_scaled, d, initial_point, 0.001, 1000, 1e-6)
predicted_price_agd = scaled_house_features.dot(x_agd)
print("Hệ số dự đoán (Accelerated Gradient Descent):", x_agd)
print("Giá trị dự đoán (Accelerated Gradient Descent):", predicted_price_agd[0])
print("Chênh lệch hệ số dự đoán:", L2_error(x_svd, x_agd))

Hệ số dự đoán (Accelerated Gradient Descent): [ 2.59342107  0.73101395  0.19867217 -0.00295741]
Giá trị dự đoán (Accelerated Gradient Descent): 3.2426832586356564
Chênh lệch hệ số dự đoán: 5.5310199190639705e-08


## Stochastic gradient descent

In [6]:
def nabla_L_of_1_set(x, w_i, d_i):
    return 2 * (w_i.dot(x) - d_i) * w_i

def nabla_L_of_all_data(x, W, d):
    return 2 * W.T.dot(W.dot(x) - d) / len(d)

# Stochastic gradient descent
def stochastic_gradient_descent(W, d, initial_point, eta, max_iterations, epsilon):
    number_of_data = len(d)
    current_point = initial_point.copy()
    i = 0    
    while i <= max_iterations:
        idx = np.random.randint(0, number_of_data)
        next_point = current_point - eta * nabla_L_of_1_set(current_point, W[idx], d[idx])
        if np.linalg.norm(nabla_L_of_all_data(next_point, W, d)) < epsilon:
            return next_point
        current_point = next_point
        i += 1
    return current_point

x_sgd = stochastic_gradient_descent(W_scaled, d, initial_point, 0.001, 10000, 1e-6)
predicted_price_sgd = scaled_house_features.dot(x_sgd)
print("Hệ số dự đoán SGD:", x_sgd)
print("Giá trị dự đoán (SGD):", predicted_price_sgd[0])
print("Chênh lệch hệ số dự đoán:", L2_error(x_svd, x_sgd))

Hệ số dự đoán SGD: [2.60201285e+00 7.51327368e-01 2.20397723e-01 1.84150194e-03]
Giá trị dự đoán (SGD): 3.2728736270633836
Chênh lệch hệ số dự đoán: 0.031328677822550936


## Sklearn

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(W_scaled, d, test_size=0.2, random_state=42)

# Fit the model
model = LinearRegression()
model.fit(X_train, y_train)
# Get the coefficients
coefficients = model.coef_
intercept = model.intercept_

# Predict the price using the trained model
predicted_price_sklearn = model.predict(scaled_house_features)
coefficients[0] = intercept
print("Hệ số dự đoán (sklearn):", coefficients)
print(f"Dự đoán giá trị: {predicted_price_sklearn[0]:.2f}")
print("Chênh lệch hệ số dự đoán:", L2_error(x_svd, coefficients))

Hệ số dự đoán (sklearn): [ 2.48931379  0.73232856  0.44655102 -0.08306847]
Dự đoán giá trị: 3.20
Chênh lệch hệ số dự đoán: 0.28053835317220305
