In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
from statsmodels.stats.outliers_influence import variance_inflation_factor
import statsmodels.api as sm

df = pd.read_csv("Civil_Engineering_Regression_Dataset.csv")

# Simple Linear Regression
X_simple = df[['Building_Height']]
y = df['Construction_Cost']

model_simple = LinearRegression()
model_simple.fit(X_simple, y)
r2_simple = r2_score(y, model_simple.predict(X_simple))

# Multiple Linear Regression
X_multiple = df[['Building_Height', 'Material_Quality_Index', 'Labor_Cost', 'Concrete_Strength', 'Foundation_Depth']]
model_multiple = LinearRegression()
model_multiple.fit(X_multiple, y)
r2_multiple = r2_score(y, model_multiple.predict(X_multiple))

n = len(df)  
p = X_multiple.shape[1]  
adjusted_r2 = 1 - ((1 - r2_multiple) * (n - 1) / (n - p - 1))

X_with_const = sm.add_constant(X_multiple)
vif_data = pd.DataFrame()
vif_data["Feature"] = X_with_const.columns
vif_data["VIF"] = [variance_inflation_factor(X_with_const.values, i) for i in range(X_with_const.shape[1])]

print(f"Simple Linear Regression R-squared: {r2_simple:.4f}")
print(f"Multiple Linear Regression R-squared: {r2_multiple:.4f}")
print(f"Adjusted R-squared for Multiple Regression: {adjusted_r2:.4f}")
print("\nVariance Inflation Factor (VIF) values:")
print(vif_data)


Simple Linear Regression R-squared: 0.9154
Multiple Linear Regression R-squared: 0.9998
Adjusted R-squared for Multiple Regression: 0.9998

Variance Inflation Factor (VIF) values:
                  Feature        VIF
0                   const  36.217244
1         Building_Height   1.047164
2  Material_Quality_Index   1.048067
3              Labor_Cost   1.054086
4       Concrete_Strength   1.019701
5        Foundation_Depth   1.040594
