<a href="https://colab.research.google.com/github/2302A52219/Generative-AI/blob/main/GAI_ASS6_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load dataset
file_path = "Housing.csv"
df = pd.read_csv(file_path)

# Identify numerical and categorical columns
num_features = ["area", "bedrooms", "bathrooms", "stories", "parking"]
cat_features = ["mainroad", "guestroom", "basement", "hotwaterheating", "airconditioning", "prefarea", "furnishingstatus"]

# Define preprocessors
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_features),
        ("cat", OneHotEncoder(drop="first"), cat_features)
    ]
)

# Prepare data
X = df.drop(columns=["price"])
y = df["price"]
X_transformed = preprocessor.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42)

# Train the Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
y_train_pred = rf_model.predict(X_train)
y_test_pred = rf_model.predict(X_test)

# Calculate error metrics
train_mse = mean_squared_error(y_train, y_train_pred)
train_mae = mean_absolute_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)

# Save the trained model
joblib.dump(rf_model, "housing_price_model.pkl")

# Print model performance
print(f"Training MSE: {train_mse:.2f}")
print(f"Training MAE: {train_mae:.2f}")
print(f"Testing MSE: {test_mse:.2f}")
print(f"Testing MAE: {test_mae:.2f}")

print("Model saved as 'housing_price_model.pkl'.")
import joblib
import numpy as np

# Load the saved model
rf_model = joblib.load("housing_price_model.pkl")

# Predict on a new sample (example input with 13 features)
new_sample = np.array([X_test[0]])  # Use any sample from X_test
predicted_price = rf_model.predict(new_sample)

print("Predicted Housing Price:", predicted_price[0])

Training MSE: 153669205868.61
Training MAE: 277348.26
Testing MSE: 1959406221695.99
Testing MAE: 1017470.62
Model saved as 'housing_price_model.pkl'.
Predicted Housing Price: 5344780.0
