<a href="https://colab.research.google.com/github/2303A52396/Generative-AI/blob/main/GAI_ass_6_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
# Import necessary libraries
import pandas as pd
import joblib
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error

# Load dataset
file_path = "/content/Housing (3).csv"
df = pd.read_csv(file_path)

# Identify numerical and categorical columns
num_features = ["area", "bedrooms", "bathrooms", "stories", "parking"]
cat_features = ["mainroad", "guestroom", "basement", "hotwaterheating", "airconditioning", "prefarea", "furnishingstatus"]

# Define preprocessors
preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_features),
        ("cat", OneHotEncoder(drop="first"), cat_features)
    ]
)

# Prepare data
X = df.drop(columns=["price"])
y = df["price"]
X_transformed = preprocessor.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_transformed, y, test_size=0.2, random_state=42)

# Train the Random Forest Regressor model
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

# Make predictions
y_train_pred = rf_model.predict(X_train)
y_test_pred = rf_model.predict(X_test)

# Calculate error metrics
train_mse = mean_squared_error(y_train, y_train_pred)
train_mae = mean_absolute_error(y_train, y_train_pred)
test_mse = mean_squared_error(y_test, y_test_pred)
test_mae = mean_absolute_error(y_test, y_test_pred)

# Save the trained model
joblib.dump(rf_model, "housing_price_model.pkl")

# Print model performance
print(f"Training MSE: {train_mse:.2f}")
print(f"Training MAE: {train_mae:.2f}")
print(f"Testing MSE: {test_mse:.2f}")
print(f"Testing MAE: {test_mae:.2f}")

print("Model saved as 'housing_price_model.pkl'.")
import joblib
import numpy as np

# Load the saved model
rf_model = joblib.load("housing_price_model.pkl")

# Predict on a new sample (example input with 13 features)
new_sample = np.array([X_test[0]])  # Use any sample from X_test
predicted_price = rf_model.predict(new_sample)

print("Predicted Housing Price:", predicted_price[0])

Training MSE: 153669205868.61
Training MAE: 277348.26
Testing MSE: 1959406221695.99
Testing MAE: 1017470.62
Model saved as 'housing_price_model.pkl'.
Predicted Housing Price: 5344780.0


In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from keras.models import Sequential
from keras.layers import Dense
from keras.models import load_model

# Step 1: Load and preprocess the dataset
url = "/content/Housing (3).csv"
dataset = pd.read_csv(url)

# Assuming 'price' is the target column and the rest are features
X = dataset.drop('price', axis=1)  # Features
y = dataset['price']  # Target variable

# ----> Identify and handle categorical columns (e.g., using OneHotEncoder) <----
categorical_cols = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']  # Replace with your categorical column names

# Create a OneHotEncoder instance (we set handle_unknown='ignore' to avoid errors for unseen categories)
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')

# Fit and transform the categorical features
encoded_cols = encoder.fit_transform(X[categorical_cols])

# Create a DataFrame from the encoded features
encoded_df = pd.DataFrame(encoded_cols, columns=encoder.get_feature_names_out(categorical_cols))

# Drop the original categorical columns and concatenate the encoded features
X = X.drop(categorical_cols, axis=1)
X = pd.concat([X, encoded_df], axis=1)

# Step 2: Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 3: Normalize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Step 4: Build the ANN model
model = Sequential()

# Hidden layers based on the provided architecture
model.add(Dense(15, input_dim=X_train.shape[1], activation='relu'))  # Hidden Layer 1
model.add(Dense(20, activation='relu'))  # Hidden Layer 2
model.add(Dense(25, activation='relu'))  # Hidden Layer 3
model.add(Dense(20, activation='relu'))  # Hidden Layer 4
model.add(Dense(15, activation='relu'))  # Hidden Layer 5

# Output layer (since we're predicting a continuous value, we use linear activation)
model.add(Dense(1, activation='linear'))

# Step 5: Compile the model
model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])

# Step 6: Train the model
history = model.fit(X_train, y_train, epochs=150, batch_size=32, validation_data=(X_test, y_test))

# Step 7: Evaluate the model
train_loss, train_mae = model.evaluate(X_train, y_train)
test_loss, test_mae = model.evaluate(X_test, y_test)

print(f"Training MAE: {train_mae}")
print(f"Testing MAE: {test_mae}")

# Step 8: Save the model in .h5 format
model.save('housing_price_predictor.h5')

# Step 9: Load the model for deployment
loaded_model = load_model('housing_price_predictor.h5')

# Step 10: Make predictions with the loaded model
predictions = loaded_model.predict(X_test)

# Display predictions vs actual values for the first 10 examples
for i in range(10):
  print(f"Predicted: {predictions[i][0]}, Actual: {y_test.iloc[i]}")

Epoch 1/150


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 25ms/step - loss: 26684931702784.0000 - mae: 4818626.5000 - val_loss: 30129992499200.0000 - val_mae: 5007536.5000
Epoch 2/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 24863228035072.0000 - mae: 4687403.0000 - val_loss: 30129992499200.0000 - val_mae: 5007536.5000
Epoch 3/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 24341681012736.0000 - mae: 4635083.5000 - val_loss: 30129992499200.0000 - val_mae: 5007536.5000
Epoch 4/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 25115358134272.0000 - mae: 4688890.5000 - val_loss: 30129986207744.0000 - val_mae: 5007536.0000
Epoch 5/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 24635447967744.0000 - mae: 4631112.0000 - val_loss: 30129971527680.0000 - val_mae: 5007534.5000
Epoch 6/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 



Training MAE: 689075.4375
Testing MAE: 1067496.0




[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Predicted: 3242923.5, Actual: 4060000
Predicted: 7437067.5, Actual: 6650000
Predicted: 3904950.25, Actual: 3710000
Predicted: 4330994.5, Actual: 6440000
Predicted: 3237407.0, Actual: 2800000
Predicted: 3221660.5, Actual: 4900000
Predicted: 5130818.0, Actual: 5250000
Predicted: 6563584.5, Actual: 4543000
Predicted: 2220465.0, Actual: 2450000
Predicted: 3187403.0, Actual: 3353000
