In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
from scipy import stats
import matplotlib.pyplot as plt
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [None]:
class CorralationAnalysis: # class to work out corrlation via regression and vif. 
    def __init__(self, dataframe):
        if not isinstance(dataframe, pd.DataFrame): # make sure it's a pd df. 
            raise ValueError("Input must be a Pandas DataFrame.")
        self.dataframe = dataframe

    def run_regression(self, target, predictors): 
        X = self.dataframe[predictors] # predictors on the x axis. 
        y = self.dataframe[target] # target on the y. 
        X_const = sm.add_constant(X) # adds the constant as x which is needed for the regression equation intercept. 
        model = sm.OLS(y, X_const).fit() # method to fit the regrssional model and return detailed info. 
        results = {
            "Coefficients": model.params.to_dict(), 
            "P_values": model.pvalues.to_dict(),
            "R_squared": model.rsquared,
            "Adjusted_R_squared": model.rsquared_adj,
            "Summary": model.summary()
        } # store regression the results as dictionary key, extracted from the model OLS.
        return results

    def calculate_vif(self, predictors): # calulate the vif.
        X = self.dataframe[predictors] # for x use the predictors.
        X_const = sm.add_constant(X) # add intercept constant for x.
        vif_data = pd.DataFrame() # create empty pandas df.
        vif_data["Variable"] = predictors # varibles in vif are the predictors. 
        vif_data["VIF"] = [
            variance_inflation_factor(X_const.values, i + 1) # i+1 stops index starting from 0. 
            for i in range(len(predictors))
        ] # uses the inbuilt 'variance_inflation_factor'method to create the vif column. 
        return vif_data

    def rotational_analysis(self, group_columns): # analysis each col. 
        results = [] # empty list to add too. 
        for target in group_columns: # select the target column. 
            predictors = [col for col in group_columns if col != target] 
            # if the column is not teh target in group columns it it added to th predictors. 
            regression_result = self.run_regression(target, predictors) # use regression class method. 
            vif_result = self.calculate_vif(predictors) # calculate the vif using the method. 
            print("\r")
            results.append({
                "Target": target,
                "Predictors": predictors,
                "Regression": regression_result,
                "VIF": vif_result
            }) # add to the empty results list a dict key pair with the results.
            # later this dict can be turned in pd df to visulize results. 
        return results
    
   def generate_summary_table(self, group_columns):
    summary_data = []  # list to store the final summary results
    analysis_results = self.rotational_analysis(group_columns)  # run rotational analysis for the group

    for result in analysis_results:
        target = result["Target"]  # target variable for the analysis.
        predictors = result["Predictors"]  # predictors associated with the target.
        p_values = result["Regression"]["P_values"]  # p values from regression results.

        target_vif_values = []  # list to store VIF p-values for each predictor.
        target_p_values = []  # list to store p values for each predictor.

        for predictor in predictors:
            # call calculate_vif to get VIF values for the current predictors
            vif_result = self.calculate_vif(predictors)
            vif_value = vif_result.loc[vif_result["Variable"] == predictor, "VIF"].iloc[0]
            # selects vif value where the 'Variable' columns matches the predictor in the vif data.
            p_value = p_values[predictor]

            # append the current predictor's VIF and p value to their respective lists.
            target_vif_values.append(vif_value)
            target_p_values.append(p_value)

            
            summary_data.append({
                "Target": target,
                "Predictor": predictor,
                "P_value": p_value,
                "VIF": vif_value
            }) # add the details to the summary data

        
        mean_vif = np.mean(target_vif_values) # mean for the vif for all targets for a predictor.
        mean_p_value = np.mean(target_p_values) # mean for the p value for all targets for a predictor.

        
        summary_data.append({
            "Target": target,
            "Predictor": "Mean (All Predictors)",
            "P_value": mean_p_value,
            "VIF": mean_vif
        }) # add summary row for the mean values

    # Return the summary as a df
    return pd.DataFrame(summary_data)
