In [None]:
import numpy as np
import pandas as pd

class RegressionModel:
    def __init__(self, x, y, create_intercept=True, regression_type="ols"):
        # Store exogenous and endogenous variables
        self.x = x
        self.y = y.values.flatten()  # Convert y to a 1D array
        self.create_intercept = create_intercept
        self.regression_type = regression_type
        self.results = None

        # Add intercept if specified
        if self.create_intercept:
            self.add_intercept()

    def add_intercept(self):
        """Add a column of ones to the x data frame to represent the intercept."""
        self.x['intercept'] = 1

    def ols_regression(self):
        """Perform OLS regression using the x and y data frames without statsmodels."""
        # Convert the x DataFrame to a NumPy matrix
        X = self.x.values

        # OLS estimation: (X'X)^(-1)X'y
        XtX = np.dot(X.T, X)  # X'X
        XtX_inv = np.linalg.inv(XtX)  # (X'X)^(-1)
        XtY = np.dot(X.T, self.y)  # X'y
        beta_hat = np.dot(XtX_inv, XtY)  # OLS coefficients

        # Predicted values and residuals
        y_hat = np.dot(X, beta_hat)
        residuals = self.y - y_hat

        # Calculate degrees of freedom and residual variance
        n = len(self.y)  # number of observations
        p = X.shape[1]  # number of parameters (including intercept)
        df = n - p
        residual_variance = np.sum(residuals**2) / df

        # Standard errors
        var_beta_hat = residual_variance * XtX_inv  # Variance-covariance matrix of beta_hat
        standard_errors = np.sqrt(np.diag(var_beta_hat))  # Standard errors

        # t-values and p-values (using normal distribution assumption)
        t_stats = beta_hat / standard_errors
        p_values = 2 * (1 - self._t_distribution_cdf(np.abs(t_stats), df))

        # Store results in the required dictionary format
        self.results = {}
        for i, var in enumerate(self.x.columns):
            self.results[var] = {
                'coefficient': beta_hat[i],
                'standard_error': standard_errors[i],
                't_stat': t_stats[i],
                'p_value': p_values[i]
            }

    def _t_distribution_cdf(self, t_value, df):
        """Approximation of the CDF of the t-distribution (used for p-values)."""
        # Using a simplified approximation to the CDF of the t-distribution.
        # You can use scipy's `t.cdf` if you want a more accurate implementation.
        from scipy import stats
        return stats.t.cdf(t_value, df)

    def summary(self):
        """Print a summary table of the regression results."""
        if self.results is None:
            print("No results available. Please run ols_regression first.")
            return

        summary_table = pd.DataFrame(self.results).T
        summary_table.columns = ['coefficient', 'standard_error', 't_stat', 'p_value']
        summary_table.index.name = 'Variable name'

        # Display the summary table
        print(summary_table)

# Example usage:
# x = pd.DataFrame({...})  # your x values
# y = pd.DataFrame({...})  # your y values
# model = RegressionModel(x, y, create_intercept=True)
# model.ols_regression()
# model.summary()
