In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression

def load(path: str) -> pd.DataFrame:
    """Load a CSV file into a Dataset object.

    Args:
        path (str): path to the CSV file

    Returns:
        Dataset: object containing the data
    """
    try:
        if not path.lower().endswith(("csv")):
            raise AssertionError("Only csv formats are supported.")
        local_dir = os.path.abspath('')
        file_path = os.path.join(local_dir, path)
        if not os.path.exists(file_path) or os.path.isdir(file_path):
            raise AssertionError("File not found:", file_path)
        df = pd.read_csv(file_path)
        return df
    except AssertionError as error:
        print(f"{AssertionError.__name__}: {error}")
        return None

In [2]:
test_knight = load("../Test_knight.csv")

In [15]:
def sklearn_vif(exogs, data):

    # initialize dictionaries
    vif_dict, tolerance_dict = {}, {}

    # form input data for each exogenous variable
    for exog in exogs:
        not_exog = [i for i in exogs if i != exog]
        X, y = data[not_exog], data[exog]

        # extract r-squared from the fit
        r_squared = LinearRegression().fit(X, y).score(X, y)

        # calculate VIF
        vif = 1/(1 - r_squared)
        vif_dict[exog] = vif

        # calculate tolerance
        tolerance = 1 - r_squared
        tolerance_dict[exog] = tolerance

    # return VIF DataFrame
    df_vif = pd.DataFrame({'VIF': vif_dict, 'Tolerance': tolerance_dict})

    return df_vif

In [16]:
vif_data = sklearn_vif(test_knight.columns, test_knight)
print(vif_data)

                       VIF  Tolerance
Sensitivity    4282.499725   0.000234
Hability         18.559728   0.053880
Strength       4003.652047   0.000250
Power           477.730519   0.002093
Agility          13.713180   0.072923
Dexterity        57.554993   0.017375
Awareness        96.794082   0.010331
Prescience       95.728833   0.010446
Reactivity        5.860520   0.170633
Midi-chlorien    25.555332   0.039131
Slash           160.894576   0.006215
Push              8.206184   0.121859
Pull            133.100410   0.007513
Lightsaber       94.559187   0.010575
Survival          9.293929   0.107597
Repulse          41.250309   0.024242
Friendship       33.496386   0.029854
Blocking         12.589272   0.079433
Deflection        8.977914   0.111384
Mass             15.562145   0.064258
Recovery       1367.001571   0.000732
Evade            31.477268   0.031769
Stims          1047.514892   0.000955
Sprint          605.793908   0.001651
Combo            20.478328   0.048832
Delay       

In [4]:
df_filtered = test_knight.drop(columns=['Sensitivity', 'Recovery', 'Stims'], axis=1)

In [5]:
df_filtered_vif = sklearn_vif(df_filtered.columns, df_filtered)
print(df_filtered_vif)

                      VIF  Tolerance
Hability        17.199580   0.058141
Strength       131.986245   0.007577
Power          198.044141   0.005049
Agility         13.146475   0.076066
Dexterity       43.580727   0.022946
Awareness       85.340525   0.011718
Prescience      91.947780   0.010876
Reactivity       5.484956   0.182317
Midi-chlorien   23.034040   0.043414
Slash           56.974056   0.017552
Push             7.504639   0.133251
Pull            37.155130   0.026914
Lightsaber      51.037750   0.019593
Survival         8.802721   0.113601
Repulse         40.073538   0.024954
Friendship      30.934352   0.032327
Blocking        11.182004   0.089429
Deflection       8.361704   0.119593
Mass            14.990690   0.066708
Evade           29.235687   0.034205
Sprint          65.076352   0.015367
Combo           19.633832   0.050932
Delay           66.971896   0.014932
Attunement      48.756242   0.020510
Empowered       45.229380   0.022110
Burst           14.959962   0.066845
G

In [6]:
df_filtered_bis = df_filtered.drop(columns=['Power', 'Lightsaber', 'Delay'], axis = 1)

In [7]:
df_filtered_vif_bis = sklearn_vif(df_filtered_bis.columns, df_filtered_bis)
print(df_filtered_vif_bis)

                     VIF  Tolerance
Hability       16.356239   0.061139
Strength       28.629658   0.034929
Agility        12.447979   0.080334
Dexterity      31.353475   0.031894
Awareness      83.336210   0.012000
Prescience     88.158844   0.011343
Reactivity      5.071919   0.197164
Midi-chlorien  20.697244   0.048316
Slash          35.301499   0.028327
Push            6.797541   0.147112
Pull           34.147277   0.029285
Survival        8.543165   0.117053
Repulse        17.169467   0.058243
Friendship     21.142101   0.047299
Blocking        9.154886   0.109231
Deflection      8.300514   0.120474
Mass           11.264950   0.088771
Evade          27.219278   0.036739
Sprint         21.092790   0.047410
Combo          19.065619   0.052450
Attunement     30.824034   0.032442
Empowered      44.066974   0.022693
Burst          14.214436   0.070351
Grasping       16.928737   0.059071


In [8]:
df_filtered_ter = df_filtered_bis.drop(columns=['Prescience', 'Empowered', 'Attunement'], axis = 1)

In [9]:
df_filtered_vif_ter = sklearn_vif(df_filtered_ter.columns, df_filtered_ter)
print(df_filtered_vif_ter)

                     VIF  Tolerance
Hability       16.283643   0.061411
Strength       24.492265   0.040829
Agility        11.749360   0.085111
Dexterity      30.397922   0.032897
Awareness      19.458128   0.051392
Reactivity      4.777634   0.209309
Midi-chlorien  17.313470   0.057758
Slash          34.958550   0.028605
Push            6.744233   0.148275
Pull           33.439072   0.029905
Survival        7.886164   0.126804
Repulse        16.872529   0.059268
Friendship     12.185051   0.082068
Blocking        3.690285   0.270982
Deflection      7.643153   0.130836
Mass           10.890157   0.091826
Evade          27.077850   0.036931
Sprint         20.825003   0.048019
Combo          18.022957   0.055485
Burst          12.003183   0.083311
Grasping       12.727508   0.078570


In [10]:
df_filtered_quart = df_filtered_ter.drop(columns=['Slash', 'Evade', 'Pull'], axis = 1)

In [11]:
df_filtered_vif_quart = sklearn_vif(df_filtered_quart.columns, df_filtered_quart)
print(df_filtered_vif_quart)

                     VIF  Tolerance
Hability        1.929257   0.518334
Strength       23.370503   0.042789
Agility        10.510475   0.095143
Dexterity      28.565108   0.035008
Awareness      19.106904   0.052337
Reactivity      4.574059   0.218624
Midi-chlorien  17.291073   0.057833
Push            2.955411   0.338362
Survival        6.361075   0.157206
Repulse        16.157161   0.061892
Friendship     11.390170   0.087795
Blocking        3.634953   0.275107
Deflection      6.269639   0.159499
Mass           10.659775   0.093811
Sprint         10.800250   0.092590
Combo          14.938275   0.066942
Burst          10.558670   0.094709
Grasping       12.505744   0.079963


In [12]:
df_filtered_quint = df_filtered_quart.drop(columns=['Strength', 'Dexterity', 'Awareness'], axis = 1)
df_filtered_vif_quint = sklearn_vif(df_filtered_quint.columns, df_filtered_quint)
print(df_filtered_vif_quint)

                     VIF  Tolerance
Hability        1.898638   0.526693
Agility         9.315447   0.107349
Reactivity      4.508369   0.221810
Midi-chlorien   9.582496   0.104357
Push            2.757160   0.362692
Survival        6.125782   0.163244
Repulse         9.327463   0.107210
Friendship      6.282198   0.159180
Blocking        3.521637   0.283959
Deflection      6.084139   0.164362
Mass            8.345522   0.119825
Sprint          1.976215   0.506018
Combo          14.293303   0.069963
Burst          10.061622   0.099388
Grasping       12.149290   0.082309


In [13]:
df_filtered_sext = df_filtered_quint.drop(columns=['Combo', 'Burst', 'Grasping'], axis = 1)
df_filtered_vif_sext = sklearn_vif(df_filtered_sext.columns, df_filtered_sext)
print(df_filtered_vif_sext)

                    VIF  Tolerance
Hability       1.716942   0.582431
Agility        2.968560   0.336864
Reactivity     2.396603   0.417257
Midi-chlorien  5.595413   0.178718
Push           2.315698   0.431835
Survival       2.454927   0.407344
Repulse        8.588565   0.116434
Friendship     6.275007   0.159362
Blocking       3.123435   0.320160
Deflection     2.365746   0.422700
Mass           6.929641   0.144308
Sprint         1.907905   0.524135


In [14]:
df_filtered_sept = df_filtered_sext.drop(columns=['Repulse'], axis = 1)
df_filtered_vif_sept = sklearn_vif(df_filtered_sept.columns, df_filtered_sept)
print(df_filtered_vif_sept)

                    VIF  Tolerance
Hability       1.688203   0.592346
Agility        2.939538   0.340189
Reactivity     2.378126   0.420499
Midi-chlorien  5.490502   0.182133
Push           2.296834   0.435382
Survival       2.450243   0.408123
Friendship     3.871245   0.258315
Blocking       3.121487   0.320360
Deflection     2.163242   0.462269
Mass           4.907832   0.203756
Sprint         1.904134   0.525173
