# Bài 1:

In [342]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

df = pd.read_csv('bang gia nha.csv')
df

Unnamed: 0,STT,Dien tich (m2),So phong ngu,Khoang cach toi TT,Gia (ty VND)
0,1,40,1,30.0,1.1
1,2,60,2,32.0,1.55
2,3,53,2,30.1,1.68
3,4,71,2,35.7,1.75
4,5,80,2,24.5,5.5
5,6,56,2,27.6,2.3
6,7,75,2,27.6,3.0
7,8,79,2,27.6,3.5
8,9,56,2,29.7,2.4
9,10,60,2,29.7,2.9


In [None]:
W = df.drop(columns=['Gia (ty VND)', 'STT'], axis=1)
d = df['Gia (ty VND)'].values
W['bias'] = np.ones((W.shape[0], 1))

def loss_function(x, W, d):
    return 1/2 * np.sum(d - W.dot(x))**2

def nabla_L(x, W, d):
    return -W.T.dot(d - W.dot(x))

def checking_inverse(matrix):
    if np.linalg.det(matrix) == 0:
        return False
    else:
        return True
    
def L2_error(x, y):
    return np.sqrt(np.sum((x - y)**2))

# SVD

In [344]:
# SVD decomposition
def svd_decomposition(W, d):
    if checking_inverse(W.T @ W):
        x = np.linalg.inv(W.T @ W) @ W.T @ d
    else:
        U, s, Vt = np.linalg.svd(W.T @ W, full_matrices=True)
        s_inv = np.zeros_like(W.T @ W)
        s_inv[:len(s), :len(s)] = np.diag(1/s)
        x = (Vt.T @ s_inv @ U.T) @ W.T @ d
    return x

house_features = np.array([(79, 2, 26.5, 1)])
x = svd_decomposition(W, d)
predicted_price_svd = house_features.dot(x)
print("Giá trị dự đoán được là:", predicted_price_svd[0])

Giá trị dự đoán được là: 3.242683206106521


# Gradient descent

In [None]:
scaler = StandardScaler()
W_scaled = scaler.fit_transform(W)
W_scaled = np.c_[np.ones((W_scaled.shape[0], 1)), W_scaled]
initial_point = np.zeros(W_scaled.shape[1])
house_features = np.array([(79, 2, 26.5, 1)])
scaled_house_features = scaler.transform(house_features)
scaled_house_features = np.c_[np.ones((scaled_house_features.shape[0], 1)), scaled_house_features]

# Gradient descent function
def gradient_descent(W, d, initial_point, eta, max_iterations, epsilon):
    current_point = initial_point.copy()
    i = 0
    while i <= max_iterations:
        grad = nabla_L(current_point, W, d)
        next_point = current_point - eta * grad
        if np.linalg.norm(grad) < epsilon:
            return next_point
        current_point = next_point
        i += 1
    return current_point

x = gradient_descent(W_scaled, d, initial_point, 0.001, 1000, 1e-6)
predicted_price_gd = scaled_house_features.dot(x)
print("Giá trị dự đoán (Gradient Descent):", predicted_price_gd[0])

# So sánh với kết quả của SVD
print("Giá trị dự đoán với SVD:", predicted_price_svd[0])
print("Giá trị dự đoán với Gradient Descent:", predicted_price_gd[0])
print("Chênh lệch giữa 2 phương pháp:", L2_error(predicted_price_svd, predicted_price_gd))

Giá trị dự đoán (Gradient Descent): 3.242683123706925
Giá trị dự đoán với SVD: 3.242683206106521
Giá trị dự đoán với Gradient Descent: 3.242683123706925
Chênh lệch giữa 2 phương pháp: 8.239959603528746e-08


# Accelerated gradient descent

In [None]:
# Accelerated gradient descent function
def accelerated_gradient_descent(W, d, initial_point, eta, max_iterations, epsilon):
    current_point = initial_point.copy()
    previous_point = initial_point.copy()
    i = 0
    while i <= max_iterations:
        temp = current_point + (i - 1) / (i + 2) * (current_point - previous_point) 
        gradient = nabla_L(temp, W, d)
        next_point = temp - eta * gradient
        if np.linalg.norm(gradient) < epsilon:
            return next_point
        previous_point = current_point
        current_point = next_point
        i += 1
    return 0

x = accelerated_gradient_descent(W_scaled, d, initial_point, 0.001, 1000, 1e-6)
predicted_price_agd = scaled_house_features.dot(x)
print("Giá trị dự đoán (Accelerated Gradient Descent):", predicted_price_agd[0])

# So sánh với kết quả của SVD
print("Giá trị dự đoán với SVD:", predicted_price_svd[0])
print("Giá trị dự đoán với Accelerated Gradient Descent:", predicted_price_agd[0])
print("Chênh lệch giữa 2 phương pháp:", L2_error(predicted_price_svd, predicted_price_agd))

Giá trị dự đoán (Accelerated Gradient Descent): 3.2426832586356564
Giá trị dự đoán với SVD: 3.242683206106521
Giá trị dự đoán với Accelerated Gradient Descent: 3.2426832586356564
Chênh lệch giữa 2 phương pháp: 5.252913526376801e-08


# Stochastic gradient descent

In [None]:
def nabla_L_of_1_set(x, y, a, b):
    grad_a = -2 * (y - (a + b * x))
    grad_b = -2 * x * (y - (a + b * x))
    return grad_a, grad_b

# Stochastic gradient descent function
def stochastic_gradient_descent(W, d, initial_point, eta, max_iterations, epsilon):
    current_point = initial_point.copy()
    number_of_data = len(d)
    i = 0
    while i <= max_iterations:
        idx = np.random.randint(0, number_of_data - 1)
        next_point = current_point - eta * nabla_L(current_point, W, d)
        grad_a, grad_b = nabla_L_of_1_set(W[idx, 1], d[idx], initial_point[0], initial_point[1])
        current_point[0] = initial_point[0] - eta * grad_a
        current_point[1] = initial_point[1] - eta * grad_b
        if np.linalg.norm([grad_a, grad_b]) < epsilon:
            return current_point
        initial_point = current_point
        i += 1

    return current_point

x = stochastic_gradient_descent(W_scaled, d, initial_point, 0.001, 10000, 1e-6)
predicted_price_sgd = scaled_house_features.dot(x)
print("Giá trị dự đoán (Stochastic Gradient Descent):", predicted_price_sgd[0])

# So sánh với kết quả của SVD
print("Giá trị dự đoán với SVD:", predicted_price_svd[0])
print("Giá trị dự đoán với Stochastic Gradient Descent:", predicted_price_sgd[0])
print("Chênh lệch giữa 2 phương pháp:", L2_error(predicted_price_svd, predicted_price_sgd))

Giá trị dự đoán (Stochastic Gradient Descent): 3.136471414519095
Giá trị dự đoán với SVD: 3.242683206106521
Giá trị dự đoán với Stochastic Gradient Descent: 3.136471414519095
Chênh lệch giữa 2 phương pháp: 0.10621179158742633


In [348]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(W, d, test_size=0.2, random_state=42)

# Fit the model
model = LinearRegression()
model.fit(X_train, y_train)
# Get the coefficients
coefficients = model.coef_
intercept = model.intercept_
# Dự đoán giá trị cho 1 ngôi nhà có diện tích 79m2, 2 phòng ngủ và cách TT 26.5km
house_features = np.array([(79, 2, 26.5, 1)])
predicted_price_sklearn = model.predict(house_features)
print(f"Dự đoán giá trị: {predicted_price_sklearn[0]:.2f}")

# So sánh với kết quả của SVD
print("Giá trị dự đoán với SVD:", predicted_price_svd[0])
print("Giá trị dự đoán với scikit-learn:", predicted_price_sklearn[0])
print("Chênh lệch giữa 2 phương pháp:", sai_so_chuan_hai(predicted_price_svd, predicted_price_sklearn))

Dự đoán giá trị: 3.20
Giá trị dự đoán với SVD: 3.242683206106521
Giá trị dự đoán với scikit-learn: 3.2023951923163443
Chênh lệch giữa 2 phương pháp: 0.04028801379017688


# Bài 2:

In [349]:
# Đọc dữ liệu
df = pd.read_csv('bang can nang chieu cao.csv')
df

Unnamed: 0,STT,Chieu cao (cm),Can nang (kg)
0,1,147,49
1,2,150,50
2,3,153,51
3,4,155,52
4,5,158,54
5,6,160,56
6,7,163,58
7,8,168,60
8,9,170,72
9,10,173,63


# SVD

In [350]:
# SVD decomposition for height and weight
W = df['Chieu cao (cm)']
W_reshaped = W.values.reshape(-1, 1)
d = df['Can nang (kg)']
W = np.c_[np.ones((W.shape[0], 1)), W]
x = svd_decomposition(W, d)
height = np.array([1, 170])
predicted_weight_svd = height.dot(x)
print("Giá trị dự đoán được là:", predicted_weight_svd)

Giá trị dự đoán được là: 62.112479507376364


# Gradient descent

In [351]:
# Chuẩn hóa dữ liệu
W_scaled = scaler.fit_transform(W_reshaped)
W_scaled = np.c_[np.ones((W_scaled.shape[0], 1)), W_scaled]
initial_point = np.zeros(W_scaled.shape[1])

# Dự đoán cho chiều cao 170
height = np.array([[170]])
scaled_height = scaler.transform(height)
scaled_height = np.c_[np.ones((scaled_height.shape[0], 1)), scaled_height]
x = gradient_descent(W_scaled, d, initial_point, 0.001, 1000, 1e-2)
predicted_weight_gd = scaled_height.dot(x)
print("Giá trị dự đoán được là:", predicted_weight_gd[0])

# So sánh với kết quả của SVD
print("Giá trị dự đoán với SVD:", predicted_weight_svd)
print("Chênh lệch giữa 2 phương pháp:", sai_so_chuan_hai(predicted_weight_svd, predicted_weight_gd))

Giá trị dự đoán được là: 62.111748207679234
Giá trị dự đoán với SVD: 62.112479507376364


TypeError: object of type 'numpy.float64' has no len()

# Accelerated gradient descent

In [None]:
# Accelerated gradient descent
x = accelerated_gradient_descent(W_scaled, d, initial_point, 0.001, 1000, 1e-6)
predicted_weight_agd = scaled_height.dot(x)
print("Giá trị dự đoán được là:", predicted_weight_agd[0])

# So sánh với kết quả của SVD
print("Giá trị dự đoán với SVD:", predicted_weight_svd)
print("Giá trị dự đoán với Accelerated Gradient Descent:", predicted_weight_agd[0])

Giá trị dự đoán được là: [0. 0.]
Giá trị dự đoán với SVD: 62.112479507376364
Giá trị dự đoán với Accelerated Gradient Descent: [0. 0.]


# Stochastic gradient descent

In [None]:
# Stochastic gradient descent
x = stochastic_gradient_descent(W_scaled, d, initial_point, 0.001, 10000, 1e-6)
predicted_weight_sgd = scaled_height.dot(x)
print("Giá trị dự đoán được là:", predicted_weight_sgd[0])

# So sánh với kết quả của SVD
print("Giá trị dự đoán với SVD:", predicted_weight_svd)
print("Chênh lệch giữa 2 phương pháp:", sai_so_chuan_hai(predicted_weight_svd, predicted_weight_sgd[0]))

Giá trị dự đoán được là: 62.26830340868121
Giá trị dự đoán với SVD: 62.112479507376364
Chênh lệch giữa 2 phương pháp: 0.15582390130484924


# Sklearn

In [None]:
# Using sklearn
X_train, X_test, y_train, y_test = train_test_split(W_scaled, d, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(X_train, y_train)
coefficients = model.coef_
intercept = model.intercept_
scaled_height = scaler.transform(np.array([[170]]))  # Scale the height
scaled_height = np.c_[np.ones((scaled_height.shape[0], 1)), scaled_height]  # Add bias term
predicted_weight_sklearn = model.predict(scaled_height)
print(f"Dự đoán giá trị: {predicted_weight_sklearn[0]}")

# So sánh với kết quả của SVD
print("Giá trị dự đoán với SVD:", predicted_weight_svd)
print("Chênh lệch giữa 2 phương pháp:", sai_so_chuan_hai(predicted_weight_svd, predicted_weight_sklearn[0]))

Dự đoán giá trị: 62.33370288248337
Giá trị dự đoán với SVD: 62.112479507376364
Chênh lệch giữa 2 phương pháp: 0.22122337510700874
