In [2]:
import numpy as np

In [3]:
class CustomLinearRegression:
    def __init__(self, x_data, y_target, learning_rate=0.01, num_epochs=10000):
        self.num_samples = x_data.shape[0]
        self.x_data = np.c_[np.ones((self.num_samples, 1)), x_data]
        self.y_target = y_target
        self.learning_rate = learning_rate
        self.num_epochs = num_epochs
        self.theta = np.random.randn(self.x_data.shape[1], 1)
        self.losses = []

    def compute_loss(self, y_pred, y_target):
        loss = (y_pred - y_target)**2
        return loss

    def predict(self, x_data):
        y_pred = x_data.dot(self.theta)
        return y_pred

    def fit(self):
        for epoch in range(self.num_epochs):

            # predict
            y_pred = self.predict(self.x_data)
            
            # Compute loss
            loss = self.compute_loss(y_pred, self.y_target)
            self.losses.append(loss)

            # Compute gradient
            loss_grd = 2 * (y_pred - self.y_target) / self.num_samples
            gradients = self.x_data.T.dot(loss_grd)

            # Update weight
            self.theta = self.theta - self.learning_rate * gradients

            if (epoch % 50) == 0:
                print(f'Epoch: {epoch} - Loss: {loss}')

        return {
            'loss': sum(self.losses) / len(self.losses),
            'weights': self.theta
        }

$R^2 = 1 - \frac{\sum_{i=1}^{n}(y_i - \hat{y_i})^2}{\sum_{i=1}^{n}(y_i - \bar{y})^2}$

- $\text{RSS}$ là Residual Sum of Squares (tổng bình phương sai số dự đoán): $\sum (y_{\text{pred}} - y)^2$
- $\text{TSS}$ là Total Sum of Squares (tổng bình phương của độ lệch giữa các giá trị thực tế và trung bình): $\sum (y - \bar{y})^2$

In [4]:
def r2score(y_pred, y):
    rss = np.sum((y_pred - y) ** 2)
    tss = np.sum((y - y.mean()) ** 2)
    r2 = 1 - (rss / tss)
    return r2

In [5]:
# Case 1
y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([1, 2, 3, 4, 5])

print(r2score(y_pred, y))

# Case 2
y_pred = np.array([1, 2, 3, 4, 5])
y = np.array([3, 5, 5, 2, 4])
print(r2score(y_pred, y))

1.0
-2.235294117647059
