In [3]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import math

# -----------------------------------------------------
# 1. Load the 1000-row real-life-style housing dataset
# -----------------------------------------------------
df = pd.read_csv("housing_dataset_1000.csv")   # <-- use your dataset

print("Dataset Preview:")
print(df.head())

# -----------------------------------------------------
# 2. Select Features and Target
# -----------------------------------------------------
X = df[["RM", "LSTAT", "PTRATIO", "TAX", "AGE", "CRIM"]]
y = df["PRICE"]

# -----------------------------------------------------
# 3. Train-Test Split
# -----------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# -----------------------------------------------------
# 4. Train Linear Regression Model
# -----------------------------------------------------
model = LinearRegression()
model.fit(X_train, y_train)

# -----------------------------------------------------
# 5. Predictions
# -----------------------------------------------------
y_pred = model.predict(X_test)

# -----------------------------------------------------
# 6. Performance Evaluation
# -----------------------------------------------------
rmse = math.sqrt(mean_squared_error(y_test, y_pred))
r2 = r2_score(y_test, y_pred)

print("\nModel Evaluation:")
print("RMSE:", rmse)
print("R² Score:", r2)

# -----------------------------------------------------
# 7. Coefficients
# -----------------------------------------------------
print("\nModel Coefficients:")
print("Intercept:", model.intercept_)
for feature, coef in zip(X.columns, model.coef_):
    print(f"{feature}: {coef}")


Dataset Preview:
        CRIM        RM      LSTAT    PTRATIO         TAX        AGE      PRICE
0   7.490802  6.142161  29.174448  15.878261  635.956531  98.534042  17.028372
1  19.014286  4.931725  10.609063  20.035377  488.490806  14.515782  18.621205
2  14.639879  6.304158  14.979191  21.017740  372.740427  18.702510  23.277837
3  11.973170  6.488469   6.686834  14.034727  601.505941  74.841107  26.684773
4   3.120373  6.447832  18.712622  12.669736  470.139447  35.364250  21.931362

Model Evaluation:
RMSE: 1.9729691077836782
R² Score: 0.8442558941897886

Model Coefficients:
Intercept: 0.5737677837969422
RM: 4.903347855416228
LSTAT: -0.31249484116695414
PTRATIO: 0.020027810283104148
TAX: -0.01007402889735931
AGE: 0.01843401957991573
CRIM: -0.03342700210237767
