In [1]:
#si-exercise
import pandas as pd
import numpy as np
from scipy.stats import t

class RegressionModel:
    def __init__(self, x, y, create_intercept=True, regression_type='ols'):
        self.x = x.copy()
        self.y = y
        self.create_intercept = create_intercept
        self.regression_type = regression_type
        self.results = {}
        if self.create_intercept:
            self.add_intercept()

    def add_intercept(self):

        self.x['intercept'] = 1

    def ols_regression(self):

        X = self.x.values
        Y = self.y.values.reshape(-1, 1)

        XtX_inv = np.linalg.inv(np.dot(X.T, X))
        XtY = np.dot(X.T, Y)
        coefficients = np.dot(XtX_inv, XtY)

        predictions = np.dot(X, coefficients)
        residuals = Y - predictions

        n, k = X.shape
        s_square = (residuals.T @ residuals) / (n - k)
        variance = s_square[0][0]
        standard_errors = np.sqrt(np.diag(variance * XtX_inv))


        t_stats = coefficients.flatten() / standard_errors
        p_values = 2 * (1 - t.cdf(np.abs(t_stats), df=n - k))


        for i, col in enumerate(self.x.columns):
            self.results[col] = {
                'coefficient': coefficients[i][0],
                'standard_error': standard_errors[i],
                't_stat': t_stats[i],
                'p_value': p_values[i]
            }

    def summary(self):
        self.ols_regression()

        summary_df = pd.DataFrame({
            "Variable name": list(self.results.keys()),
            "coefficient value": [self.results[var]['coefficient'] for var in self.results],
            "standard error": [self.results[var]['standard_error'] for var in self.results],
            "t-statistic": [self.results[var]['t_stat'] for var in self.results],
            "p-value": [self.results[var]['p_value'] for var in self.results]
        })

        print(summary_df)
        return summary_df