In [11]:
import pandas as pd
import joblib
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# 1. Auto-download California housing data
print("📥 Downloading California housing dataset...")
data = fetch_california_housing(as_frame=True)
df = data.frame

# 2. Input features and target
X = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal']

print("\n📌 Model expects the following input features:")
for i, feature in enumerate(X.columns, 1):
    print(f"{i}. {feature}")

# 3. Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 4. Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
print("\n⚙️ Training the Random Forest model...")
model.fit(X_train, y_train)

# 5. Evaluate
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print("\n📊 Model Evaluation:")
print(f"Mean Squared Error: {mse:.4f}")
print(f"R² Score: {r2:.4f}")

# 6. Save model to pickle file
model_filename = "house_price_model.pkl"
joblib.dump(model, model_filename)
print(f"\n💾 Model saved as: {model_filename}")

# 7. Example prediction
sample = X_test.iloc[[0]]
predicted_price = model.predict(sample)[0]
print("\n🔮 Sample prediction:")
print(f"Input: {sample.to_dict('records')[0]}")
print(f"Predicted House Value: ${predicted_price * 100000:.2f}")


📥 Downloading California housing dataset...

📌 Model expects the following input features:
1. MedInc
2. HouseAge
3. AveRooms
4. AveBedrms
5. Population
6. AveOccup
7. Latitude
8. Longitude

⚙️ Training the Random Forest model...

📊 Model Evaluation:
Mean Squared Error: 0.2554
R² Score: 0.8051

💾 Model saved as: house_price_model.pkl

🔮 Sample prediction:
Input: {'MedInc': 1.6812, 'HouseAge': 25.0, 'AveRooms': 4.192200557103064, 'AveBedrms': 1.0222841225626742, 'Population': 1392.0, 'AveOccup': 3.8774373259052926, 'Latitude': 36.06, 'Longitude': -119.01}
Predicted House Value: $50950.00
