Regresja liniowa wieloczynnikowa

In [9]:
import numpy as np
import copy
import pandas as pd


class MultipleLinearRegression():
    def __init__(self):
        self.coefficients = None
        self.intercept = None

    def fit(self, x, y):
        # przygotowanie wartości x i y dla oszacowań współczynników
        x = self._transform_x(x)
        y = self._transform_y(y)

        betas = self._estimate_coefficients(x, y)

        self.intercept = betas[0]

        self.coefficients = betas[1:]

    def predict(self, x):
        predictions = []
        for index, row in x.iterrows():
            values = row.values

            pred = np.multiply(values, self.coefficients)
            pred = sum(pred)
            pred += self.intercept

            predictions.append(pred)

        return predictions

    def r2_score(self, y_true, y_pred):
        y_values = y_true.values
        y_average = np.average(y_values)

        residual_sum_of_squares = 0
        total_sum_of_squares = 0

        for i in range(len(y_values)):
            residual_sum_of_squares += (y_values[i] - y_pred[i])**2
            total_sum_of_squares += (y_values[i] - y_average)**2

        return 1 - (residual_sum_of_squares/total_sum_of_squares)

    def _transform_x(self, x):
        x = copy.deepcopy(x)
        x.insert(0, 'ones', np.ones( (x.shape[0], 1) ))
        return x.values

    def _transform_y(self, y):
        y = copy.deepcopy(y)
        return y.values

    def _estimate_coefficients(self, x, y):
        xT = x.transpose()

        inversed = np.linalg.inv( xT.dot(x) )
        coefficients = inversed.dot( xT ).dot(y)

        return coefficients

from load_dataset import x_train, x_test, y_train, y_test

mlr = MultipleLinearRegression()

mlr.fit(x_train, y_train)

pred = mlr.predict(x_test)
#print(f'predictions:{pred}')
#coefficients=mlr._estimate_coefficients(x_test,y_test)
#print(f'coefficients:{x_test},{y_test}')
score = mlr.r2_score(y_test, pred)
print(f'R^2: {score}')


R^2: 0.6687594935356445
