In [2]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression

def load(path: str) -> pd.DataFrame:
    """Load a CSV file into a Dataset object.

    Args:
        path (str): path to the CSV file

    Returns:
        Dataset: object containing the data
    """
    try:
        if not path.lower().endswith(("csv")):
            raise AssertionError("Only csv formats are supported.")
        local_dir = os.path.abspath('')
        file_path = os.path.join(local_dir, path)
        if not os.path.exists(file_path) or os.path.isdir(file_path):
            raise AssertionError("File not found:", file_path)
        df = pd.read_csv(file_path)
        return df
    except AssertionError as error:
        print(f"{AssertionError.__name__}: {error}")
        return None

In [3]:
test_knight = load("../Test_knight.csv")

In [4]:
def sklearn_vif(exogs, data):

    # initialize dictionaries
    vif_dict, tolerance_dict = {}, {}

    # form input data for each exogenous variable
    for exog in exogs:
        not_exog = [i for i in exogs if i != exog]
        X, y = data[not_exog], data[exog]

        # extract r-squared from the fit
        r_squared = LinearRegression().fit(X, y).score(X, y)

        # calculate VIF
        vif = 1/(1 - r_squared)
        vif_dict[exog] = vif

        # calculate tolerance
        tolerance = 1 - r_squared
        tolerance_dict[exog] = tolerance

    # return VIF DataFrame
    df_vif = pd.DataFrame({'VIF': vif_dict, 'Tolerance': tolerance_dict})

    return df_vif

In [5]:
vif_data = sklearn_vif(test_knight.columns, test_knight)
print(vif_data)

                       VIF  Tolerance
Sensitivity    4282.499725   0.000234
Hability         18.559728   0.053880
Strength       4003.652047   0.000250
Power           477.730519   0.002093
Agility          13.713180   0.072923
Dexterity        57.554993   0.017375
Awareness        96.794082   0.010331
Prescience       95.728833   0.010446
Reactivity        5.860520   0.170633
Midi-chlorien    25.555332   0.039131
Slash           160.894576   0.006215
Push              8.206184   0.121859
Pull            133.100410   0.007513
Lightsaber       94.559187   0.010575
Survival          9.293929   0.107597
Repulse          41.250309   0.024242
Friendship       33.496386   0.029854
Blocking         12.589272   0.079433
Deflection        8.977914   0.111384
Mass             15.562145   0.064258
Recovery       1367.001571   0.000732
Evade            31.477268   0.031769
Stims          1047.514892   0.000955
Sprint          605.793908   0.001651
Combo            20.478328   0.048832
Delay       

In [26]:
df_filtered = test_knight.drop(columns=['Grasping', 'Agility', 'Survival', 'Burst',
                                        'Mass', 'Prescience', 'Awareness', 
                                        'Recovery', 'Strength', 'Sensitivity',
                                        'Slash', 'Hability', 'Stims', 'Delay',
                                        'Attunement', 'Dexterity', 'Empowered',
                                        'Sprint', 'Friendship', 'Pull', 'Midi-chlorien'], axis=1)
df_filtered_vif = sklearn_vif(df_filtered.columns, df_filtered)
print(df_filtered_vif)

                 VIF  Tolerance
Power       5.657272   0.176764
Reactivity  2.213826   0.451707
Push        2.130382   0.469399
Lightsaber  5.238339   0.190900
Repulse     2.492187   0.401254
Blocking    2.520672   0.396720
Deflection  2.323479   0.430389
Evade       1.709947   0.584813
Combo       1.395325   0.716679


In [39]:
df_filtered_bis = test_knight.drop(columns=['Sensitivity', 'Strength', 'Recovery', 'Stims',
                                            'Power', 'Awareness', 'Prescience',
                                            'Slash', 'Pull', 'Lightsaber', 'Repulse', 'Friendship',
                                            'Dexterity', 'Evade', 'Empowered', 'Midi-chlorien',
                                            'Combo', 'Delay', 'Burst', 'Grasping'], axis=1)
df_filtered_vif = sklearn_vif(df_filtered_bis.columns, df_filtered_bis)
print(df_filtered_vif)

                 VIF  Tolerance
Hability    1.727515   0.578866
Agility     2.058289   0.485840
Reactivity  2.380483   0.420083
Push        2.329757   0.429229
Survival    2.697287   0.370743
Blocking    2.197711   0.455019
Deflection  2.102137   0.475706
Mass        2.345453   0.426357
Sprint      1.969148   0.507834
Attunement  3.107786   0.321772
