In [11]:
#week 8
import numpy as np
import pandas as pd
import scipy as sp

class RegressionModel:
    def __init__(self, x: pd.DataFrame, y: pd.DataFrame, create_intercept: bool, regression_type="ols"):
        self.x = x.copy()
        self.y = y.copy()
        self.create_intercept = create_intercept
        self.regression_type = regression_type
        self.results = {}

        if self.create_intercept:
            self.add_intercept()

    def add_intercept(self):
        self.x['intercept'] = 1

    def ols_regression(self):
        # Convert the x DataFrame to a NumPy matrix
        X = self.x.values
        X = np.array(X, dtype=float)

        # OLS estimation
        XtX = np.dot(X.T, X)  # X'X
        XtY = np.dot(X.T, self.y)  # X'y
        beta_hat = sp.linalg.solve(XtX, XtY, assume_a='pos')

        # Predicted values and residuals
        y_hat = np.dot(X, beta_hat)
        residuals = self.y - y_hat

        # Calculate degrees of freedom and residual variance
        n = len(self.y)  # number of rows
        k = X.shape[1]  # number of columns
        df = n - k
        residual_variance = np.sum(residuals**2) / df

        # Standard errors
        var_beta_hat = residual_variance * sp.linalg.inv(XtX)  # Variance-covariance matrix of beta_hat
        standard_errors = np.sqrt(np.diag(var_beta_hat))  # Standard errors

        t_stats = beta_hat / standard_errors
        p_values = 2 * (sp.stats.t.sf(np.abs(t_stats), df))  # Using scipy's t-distribution survivial fucntion

        # Store resluts in the required dictionary format
        for i, var in enumerate(self.x.columns):
            self.results[var] = {
                'coefficient': beta_hat[i],
                'standard_error': standard_errors[i],
                't_stat': t_stats[i],
                'p_value': p_values[i]
            }

    def summary(self):
        if not self.results:
            print("No results available. Please run ols_regression first.")
            return

        # Convert results dictionary into a DataFrame for summary
        summary_table = pd.DataFrame(self.results)
        summary_table.columns = ['coefficient', 'standard_error', 't_stat', 'p_value']
        summary_table.index.name = 'Variable name'

        # Rename columns for the summary output
        summary_table.rename(columns={
            'coefficient': 'coefficient value',
            'standard_error': 'standard error',
            't_stat': 't-statistic',
            'p_value': 'p-value'
        }, inplace=True)

        print(summary_table)