In [None]:
import numpy as np
import pandas as pd

def calculate_vif(df):
    X = df.values
    n, k = X.shape
    vif_data = []

    for i in range(k):
        y = X[:, i]
        X_other = np.delete(X, i, axis=1)
        
        # Enkel regression (minsta kvadrat) för att beräkna R²
        beta, *_ = np.linalg.lstsq(X_other, y, rcond=None)
        y_pred = X_other @ beta
        r2 = 1 - np.sum((y - y_pred)**2) / np.sum((y - y.mean())**2)
        
        vif = 1 / (1 - r2) if r2 < 1 else float("inf")
        vif_data.append({"Feature": df.columns[i], "VIF": vif})
    
    return pd.DataFrame(vif_data)


In [None]:
import pandas as pd

df = pd.read_csv("synthetic_credit_data.csv")

features = ["age", "years_with_bank", "income", "credit_score", 
            "has_loan", "loan_amount", "dti", "missed_payments", "prob_default"]

X = df[features].dropna()
vif_df = calculate_vif(X)
print(vif_df)


           Feature        VIF
0              age   1.534793
1  years_with_bank   1.593626
2           income   1.503262
3     credit_score   0.113092
4         has_loan   4.987881
5      loan_amount   9.744918
6              dti  23.093340
7  missed_payments   9.485430
8     prob_default  26.988956


In [None]:
bor = df[df["has_loan"] == 1].copy()

features = ["age", "years_with_bank", "income", "credit_score", 
            "loan_amount", "dti", "missed_payments", "prob_default"]

X = bor[features].dropna()
vif_df = calculate_vif(X)
print(vif_df)

           Feature        VIF
0              age   1.571867
1  years_with_bank   3.097526
2           income   4.852512
3     credit_score   0.269638
4      loan_amount  12.691806
5              dti  14.441339
6  missed_payments  10.391560
7     prob_default  15.187819
