In [7]:
#@title (Hidden) Diagnostic Check
import os
import sys
import pandas as pd
import numpy as np
import statsmodels
import statsmodels.api as sm
import statsmodels.formula.api as smf
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as stats

In [79]:
class OLS:
    def __init__(self, data, yname,xname):
        self.yname = yname
        self.y = data[yname]
        self.xname = xname
        self.x = data[xname]
        model_description = yname + '~' + xname
        self.data = data
        self.model = smf.ols(model_description, data = data)
        self.results = self.model.fit()
        self.beta_hat = self.results.params
        self.y_hat = self.results.predict()
        self.residuals = self.results.resid

    def scatter(self):
        plt.clf()
        sns.scatterplot(data=self.data,  x=self.xname, y=self.yname)
        
    def summary(self):
        display(self.results.summary())
        
    def anova(self):
        display(statsmodels.stats.anova.anova_lm(self.results))
        
    def plot_fitted(self,x=None, y=None):
        if x is None:
            x = self.x
        if y is None:
            y = self.y
        plt.clf()
        plt.scatter(x, y,  color='black', zorder = 2)
        plt.plot(x, self.y_hat, color='blue', linewidth = 3, zorder = 3)
        plt.xlabel(r'$x$')
        plt.ylabel(r'$y$')
        plt.title(rf"Estimated values for $\beta_0 = {self.beta_hat[0]:.04f}, \beta_1 = {self.beta_hat[1]:.04f}$ ")
        plt.suptitle('Linear Regression')
        plt.show()
        
    def plot_hist(self):
        plt.clf()
        sns.histplot(self.residuals)
        
    def plot_x_residual(self):
        plt.clf()
        x = self.x
        plt.scatter(x, self.residuals)
        plt.axhline(y = 0, color = 'r', linestyle = '-')
        plt.xlabel(r'$x$')
        plt.ylabel('Residuals')
        plt.title("Comparing residuals against covariate")
        plt.show()
        
    def plot_yhat_residual(self):
        plt.clf()
        yhat = self.y_hat
        plt.scatter(yhat, self.residuals)
        plt.axhline(y = 0, color = 'r', linestyle = '-')
        plt.xlabel(r'$fitted values$')
        plt.ylabel('Residuals')
        plt.title("Comparing residuals against fitted values")
        plt.show()

In [80]:
data = pd.read_csv('https://raw.githubusercontent.com/ciaran-evans/bryozoan-data-paper/master/bryozoan_data_fixed.csv')
xname = "Mass"
yname = "Metabolic"

olsmod = OLS(data,yname,xname)

In [78]:
#olsmod.scatter()
#olsmod.summary()
#olsmod.anova()
#olsmod.plot_fitted()
#olsmod.plot_hist()
#olsmod.plot_x_residual()
#olsmod.plot_yhat_residual()