In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from statsmodels.stats.outliers_influence import variance_inflation_factor
import statsmodels.api as sm

# Create a dataset with 10 rows
data = {
    'Income': [40, 45, 50, 55, 60, 65, 70, 75, 80, 85],
    'Age': [25, 27, 30, 32, 35, 37, 40, 42, 45, 47],
    'SpendingScore': [30, 35, 40, 45, 50, 55, 60, 65, 70, 75],
    'PurchaseAmount': [200, 210, 240, 260, 290, 310, 340, 360, 390, 410]
}
df = pd.DataFrame(data)
print(df)

   Income  Age  SpendingScore  PurchaseAmount
0      40   25             30             200
1      45   27             35             210
2      50   30             40             240
3      55   32             45             260
4      60   35             50             290
5      65   37             55             310
6      70   40             60             340
7      75   42             65             360
8      80   45             70             390
9      85   47             75             410


In [2]:
# Check Multicollinearity (VIF)
X = df[['Income', 'Age', 'SpendingScore']]
vif_data = pd.DataFrame()
vif_data["feature"] = X.columns
vif_data["VIF"] = [variance_inflation_factor(X.values, i) for i in range(len(X.columns))]
print("VIF Scores:")
print(vif_data)

VIF Scores:
         feature         VIF
0         Income  67218.8125
1            Age    841.5000
2  SpendingScore  12516.5625


In [3]:
# Model Building
X = sm.add_constant(X) # Add constant for statsmodels
y = df['PurchaseAmount']

model = sm.OLS(y, X).fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:         PurchaseAmount   R-squared:                       0.999
Model:                            OLS   Adj. R-squared:                  0.998
Method:                 Least Squares   F-statistic:                     2848.
Date:                Wed, 26 Nov 2025   Prob (F-statistic):           6.48e-11
Time:                        03:42:51   Log-Likelihood:                -23.148
No. Observations:                  10   AIC:                             52.30
Df Residuals:                       7   BIC:                             53.20
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                    coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------
const            -1.4706      0.553     -2.661