In [6]:
import pandas as pd
import numpy as np

from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

# --------------------------------------------
# PART 1: HOUSE PRICE PREDICTION (CALIFORNIA)
# --------------------------------------------

print("==== PART 1: California Housing – Linear Regression ====\n")

# 1. Load dataset
data = fetch_california_housing(as_frame=True)
X = data.data
y = data.target

print("Feature columns:", list(X.columns))
print("Target name:", data.target_names, "\n")

# 2. Train–test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)
print("Train shape:", X_train.shape)
print("Test shape:", X_test.shape, "\n")

# 3. Train Linear Regression model WITHOUT scaling
model_raw = LinearRegression()
model_raw.fit(X_train, y_train)
y_pred_raw = model_raw.predict(X_test)

# 4. Evaluate model (without scaling)
mae_raw = mean_absolute_error(y_test, y_pred_raw)
mse_raw = mean_squared_error(y_test, y_pred_raw)
r2_raw = r2_score(y_test, y_pred_raw)

print("---- Without Scaling ----")
print("MAE (raw):", mae_raw)
print("MSE (raw):", mse_raw)
print("R² (raw):", r2_raw, "\n")

# 5. Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 6. Train Linear Regression model WITH scaling
model_scaled = LinearRegression()
model_scaled.fit(X_train_scaled, y_train)
y_pred_scaled = model_scaled.predict(X_test_scaled)

# 7. Evaluate model (with scaling)
mae_scaled = mean_absolute_error(y_test, y_pred_scaled)
mse_scaled = mean_squared_error(y_test, y_pred_scaled)
r2_scaled = r2_score(y_test, y_pred_scaled)

print("---- With Scaling ----")
print("MAE (scaled):", mae_scaled)
print("MSE (scaled):", mse_scaled)
print("R² (scaled):", r2_scaled, "\n")

print("==== Comparison (Raw vs Scaled) ====")
print(f"MAE  - Raw: {mae_raw:.4f} | Scaled: {mae_scaled:.4f}")
print(f"MSE  - Raw: {mse_raw:.4f} | Scaled: {mse_scaled:.4f}")
print(f"R²   - Raw: {r2_raw:.4f} | Scaled: {r2_scaled:.4f}")
print("\n")


# -------------------------------------------------
# PART 2: CLASSROOM ELECTRICITY BILL PREDICTION
# -------------------------------------------------

print("==== PART 2: Classroom Electricity Bill Prediction ====\n")

# ✅ Added more rows (so test set has enough samples)
bill_data = pd.DataFrame({
    "students":  [30, 32, 28, 35, 33, 36, 29, 31],
    "ac_hours":  [5,  6,  4,  7,  6,  8,  5,  6],
    "computers":[10, 12, 9, 11, 13, 14, 10, 12],
    "bill":     [2500, 2700, 2300, 3000, 2800, 3200, 2400, 2650]
})

print("Bill dataset:\n", bill_data, "\n")

Xb = bill_data[["students", "ac_hours", "computers"]]
yb = bill_data["bill"]

# Use 25% test data → 8 * 0.25 = 2 samples in test set
Xb_train, Xb_test, yb_train, yb_test = train_test_split(
    Xb, yb, test_size=0.25, random_state=42
)

bill_model = LinearRegression()
bill_model.fit(Xb_train, yb_train)

yb_pred = bill_model.predict(Xb_test)

print("Test features (Xb_test):\n", Xb_test, "\n")
print("Actual bill (y_test):", list(yb_test))
print("Predicted bill:", list(np.round(yb_pred, 2)), "\n")

# Now R² is well-defined (we have >= 2 test samples)
mae_bill = mean_absolute_error(yb_test, yb_pred)
mse_bill = mean_squared_error(yb_test, yb_pred)
r2_bill = r2_score(yb_test, yb_pred)

print("Bill Model Performance:")
print("MAE:", mae_bill)
print("MSE:", mse_bill)
print("R²:", r2_bill, "\n")

# Predict for a NEW classroom
new_classroom = pd.DataFrame({
    "students":  [34],
    "ac_hours":  [6],
    "computers": [12]
})

new_bill_pred = bill_model.predict(new_classroom)
print("New classroom details:\n", new_classroom)
print("Predicted electricity bill for new classroom:",
      round(new_bill_pred[0], 2))


==== PART 1: California Housing – Linear Regression ====

Feature columns: ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude']
Target name: ['MedHouseVal'] 

Train shape: (16512, 8)
Test shape: (4128, 8) 

---- Without Scaling ----
MAE (raw): 0.5332001304956555
MSE (raw): 0.5558915986952442
R² (raw): 0.575787706032451 

---- With Scaling ----
MAE (scaled): 0.5332001304956565
MSE (scaled): 0.5558915986952444
R² (scaled): 0.5757877060324508 

==== Comparison (Raw vs Scaled) ====
MAE  - Raw: 0.5332 | Scaled: 0.5332
MSE  - Raw: 0.5559 | Scaled: 0.5559
R²   - Raw: 0.5758 | Scaled: 0.5758


==== PART 2: Classroom Electricity Bill Prediction ====

Bill dataset:
    students  ac_hours  computers  bill
0        30         5         10  2500
1        32         6         12  2700
2        28         4          9  2300
3        35         7         11  3000
4        33         6         13  2800
5        36         8         14  3200
6        29     