<a href="https://colab.research.google.com/github/2303A52269/Generative-AI/blob/main/2303A52269_week6_ASS6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import numpy as np
from sklearn.preprocessing import OneHotEncoder

# Load dataset (Assuming CSV format after extracting from Google Drive)
url = '/content/Housing (2).csv'  # Replace this with the actual dataset path
df = pd.read_csv(url)

# Split the dataset into input features and target variable (housing prices)
X = df.drop('price', axis=1)  # Assuming 'price' is the target variable
y = df['price']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Identify categorical features (columns with object dtype)
categorical_features = X_train.select_dtypes(include=['object']).columns

# Create and fit OneHotEncoder for categorical features
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore') # sparse=False for numpy array
encoded_features_train = encoder.fit_transform(X_train[categorical_features])
encoded_features_test = encoder.transform(X_test[categorical_features])

# Create DataFrames for encoded features
encoded_df_train = pd.DataFrame(encoded_features_train, columns=encoder.get_feature_names_out(categorical_features), index=X_train.index)
encoded_df_test = pd.DataFrame(encoded_features_test, columns=encoder.get_feature_names_out(categorical_features), index=X_test.index)

# Drop original categorical columns and concatenate encoded features
X_train = X_train.drop(categorical_features, axis=1)
X_test = X_test.drop(categorical_features, axis=1)
X_train = pd.concat([X_train, encoded_df_train], axis=1)
X_test = pd.concat([X_test, encoded_df_test], axis=1)


# Normalize the data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
from tensorflow.keras.metrics import MeanSquaredError

# Define the ANN model
model = Sequential()

# Hidden Layer 1
model.add(Dense(15, input_dim=X_train.shape[1], activation='tanh'))

# Hidden Layer 2
model.add(Dense(20, activation='tanh'))

# Hidden Layer 3
model.add(Dense(15, activation='tanh'))

# Output layer (Assuming it's a regression problem, one neuron for price prediction)
model.add(Dense(1))

# Compile the model
model.compile(loss='mean_squared_error',
              optimizer=SGD(),
              metrics=[MeanSquaredError()])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [3]:
# Train the model
history = model.fit(X_train, y_train,
                    epochs=100,
                    batch_size=16,
                    validation_data=(X_test, y_test))

Epoch 1/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 10482531434496.0000 - mean_squared_error: 10482531434496.0000 - val_loss: 6078684200960.0000 - val_mean_squared_error: 6078684200960.0000
Epoch 2/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 3335107641344.0000 - mean_squared_error: 3335107641344.0000 - val_loss: 5906368561152.0000 - val_mean_squared_error: 5906368561152.0000
Epoch 3/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 3112712011776.0000 - mean_squared_error: 3112712011776.0000 - val_loss: 6364250767360.0000 - val_mean_squared_error: 6364250767360.0000
Epoch 4/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 3105856684032.0000 - mean_squared_error: 3105856684032.0000 - val_loss: 5845630844928.0000 - val_mean_squared_error: 5845630844928.0000
Epoch 5/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/s

In [5]:
# Calculate the training error
train_loss, train_mse = model.evaluate(X_train, y_train)
print(f"Training MSE: {train_mse}")

# Calculate the testing error
test_loss, test_mse = model.evaluate(X_test, y_test)
print(f"Testing MSE: {test_mse}")

[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 3121634344960.0000 - mean_squared_error: 3121634344960.0000  
Training MSE: 3083490033664.0
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 5194783391744.0000 - mean_squared_error: 5194783391744.0000
Testing MSE: 5062870433792.0


In [4]:
# Save the trained model
model.save('housing_price_model.h5')



In [6]:
from tensorflow.keras.models import load_model

# Load the saved model
loaded_model = load_model('housing_price_model.h5')

# Make predictions on the test data
predictions = loaded_model.predict(X_test)

# Example: Compare predictions with actual values
for actual, predicted in zip(y_test[:10], predictions[:10]):
    print(f"Actual: {actual}, Predicted: {predicted[0]}")



[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 20ms/step
Actual: 4060000, Predicted: 4824132.0
Actual: 6650000, Predicted: 4824132.0
Actual: 3710000, Predicted: 4824132.0
Actual: 6440000, Predicted: 4824132.0
Actual: 2800000, Predicted: 4824132.0
Actual: 4900000, Predicted: 4824132.0
Actual: 5250000, Predicted: 4824132.0
Actual: 4543000, Predicted: 4824132.0
Actual: 2450000, Predicted: 4824132.0
Actual: 3353000, Predicted: 4824132.0
