In [17]:
import pandas as pd
import numpy as np

from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split

In [18]:
# import prepared data
data = pd.read_csv("Merged_Data.csv")

# Basic cleaning ensureing no nan remain
data = data.dropna(subset=[
    "Median_Home_Value",
    "Median_Household_Income",
    "Vacancy_Rate"
])

print("Rows remaining after NA removal:", len(df))

# Target variable
y = data["Median_Home_Value"]

Rows remaining after NA removal: 131


In [19]:
# linear Regression using Median Income
X1 = data[["Median_Household_Income"]]

X1_train, X1_test, y1_train, y1_test = train_test_split(X1, y, test_size=0.2, random_state=42)

model1 = LinearRegression()
model1.fit(X1_train, y1_train)

y1_pred = model1.predict(X1_test)



print("MODEL 1: Median_Home_Value ~ Median_Household_Income")
print("Coefficient (Income):", model1.coef_[0])
print("Intercept:", model1.intercept_)
print("R²:", r2_score(y1_test, y1_pred))
print("MAE:", mean_absolute_error(y1_test, y1_pred))
print("RMSE:", np.sqrt(mean_squared_error(y1_test, y1_pred)))

MODEL 1: Median_Home_Value ~ Median_Household_Income
Coefficient (Income): 5.6282654105325
Intercept: -48986.898025080794
R²: 0.49138558520588493
MAE: 74390.36590560111
RMSE: 89678.69811318273


In [20]:
# linear Regression using Vacancy Rates
X2 = data[["Vacancy_Rate"]]

X2_train, X2_test, y2_train, y2_test = train_test_split(X2, y, test_size=0.2, random_state=42)

model2 = LinearRegression()
model2.fit(X2_train, y2_train)

y2_pred = model2.predict(X2_test)


print("MODEL 2: Median_Home_Value ~ Vacancy_Rate")
print("Coefficient (Vacancy Rate):", model2.coef_[0])
print("Intercept:", model2.intercept_)
print("R²:", r2_score(y2_test, y2_pred))
print("MAE:", mean_absolute_error(y2_test, y2_pred))
print("RMSE:", np.sqrt(mean_squared_error(y2_test, y2_pred)))

MODEL 2: Median_Home_Value ~ Vacancy_Rate
Coefficient (Vacancy Rate): 8708.001665666156
Intercept: 381667.48650836834
R²: 0.004323011242629238
MAE: 104060.13827064655
RMSE: 125474.1313397697


In [21]:
# linear Regression using calculated income vacancy interaction of median income * vacancy rate.
data["Income_Vacancy_Interaction"] = (
    data["Median_Household_Income"] * data["Vacancy_Rate"]
)

X3 = data[[
    "Median_Household_Income",
    "Vacancy_Rate",
    "Income_Vacancy_Interaction"
]]

X3_train, X3_test, y3_train, y3_test = train_test_split(X3, y, test_size=0.2, random_state=42)

model3 = LinearRegression()
model3.fit(X3_train, y3_train)

y3_pred = model3.predict(X3_test)


print("MODEL 3: Income, Vacancy, and Interaction")
print("Coefficients:")
for name, coef in zip(X3.columns, model3.coef_):
    print(f"  {name}: {coef}")

print("Intercept:", model3.intercept_)
print("R²:", r2_score(y3_test, y3_pred))
print("MAE:", mean_absolute_error(y3_test, y3_pred))
print("RMSE:", np.sqrt(mean_squared_error(y3_test, y3_pred)))

MODEL 3: Income, Vacancy, and Interaction
Coefficients:
  Median_Household_Income: 4.564988179686053
  Vacancy_Rate: 302.2015687739115
  Income_Vacancy_Interaction: 0.0886074414850343
Intercept: -31444.157420708565
R²: 0.7090412654115066
MAE: 53412.07741832887
RMSE: 67828.25781691709
