In [1]:
# This Machine Learning code will train the model on 10K custom generated features and predicted house price.
# using the trained model, it will try to predict the price of a real house
#
# By Poorva Vakharia
# Aug 2025

In [2]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [3]:
# 🔹 Step 1: Load training data
train_csv = "synthetic_housing_data.csv"
df_train = pd.read_csv(train_csv)

FileNotFoundError: [Errno 2] No such file or directory: 'synthetic_housing_data.csv'

In [None]:
# 🔹 Step 2: Separate features and target
X_train_raw = df_train.drop("price", axis=1)
y_train = df_train["price"]

In [None]:
# 🔹 Step 3: Identify feature types
categorical_features = ["zip_code", "home_type"]  # Adjust as needed
numerical_features = [col for col in X_train_raw.columns if col not in categorical_features]
print(numerical_features)

In [None]:
# 🔹 Step 4: Fit transformers separately
scaler = StandardScaler()
encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False)

X_num_scaled = scaler.fit_transform(X_train_raw[numerical_features])
X_cat_encoded = encoder.fit_transform(X_train_raw[categorical_features])

In [None]:
print(X_cat_encoded)

In [None]:
# 🔹 Step 5: Combine transformed features
import numpy as np
X_train_transformed = np.hstack([X_num_scaled, X_cat_encoded])

In [None]:
# 🔹 Step 6: Train model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train_transformed, y_train)

In [None]:
# 🔹 Step 7: Save model and transformers
joblib.dump(model, "house_price_model.pkl")
joblib.dump(scaler, "scaler.pkl")
joblib.dump(encoder, "encoder.pkl")

In [None]:
# 🔹 Step 8: Load new data for prediction
new_csv = "my_house_price.csv"
df_new = pd.read_csv(new_csv)

In [None]:
# 🔹 Step 10: Predict house prices
predicted_prices = model.predict(X_new_transformed)
df_new["predicted_price"] = predicted_prices

In [None]:
# 🔹 Step 11: Output predictions
print("\n🏠 Predicted Prices for New Data:")
print(df_new[["predicted_price"]].head())

In [None]:
# 🔹 Step 12: Save predictions (optional)
df_new.to_csv("housing_predictions.csv", index=False)
print("\n✅ Predictions saved to 'housing_predictions.csv'")