<a href="https://colab.research.google.com/github/2303a52192/GENERATIVE_AI_2025/blob/main/2303A52192_GEN_AI_ass_6_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

# Load the dataset from a local file
file_path = '/content/Housing.csv'  # Replace with the path to your CSV file
df = pd.read_csv(file_path)

# Check for missing values and data types
print(df.isnull().sum())  # Check for missing values
print(df.dtypes)  # Check data types of each column

# Convert categorical columns to numeric using LabelEncoder or One-Hot Encoding
# Example: If you have a column 'column_name' with values 'yes'/'no'
categorical_columns = ['guestroom', 'mainroad', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']  # List of categorical columns to encode

# Label Encoding (works for binary columns like 'yes'/'no')
le = LabelEncoder()
for col in categorical_columns:
    df[col] = le.fit_transform(df[col])

# If you have more than two categories, you can use One-Hot Encoding:
# df = pd.get_dummies(df, columns=categorical_columns, drop_first=True)

# Fill missing values if any (using median for numerical columns)
# Exclude non-numeric columns from median calculation
numeric_df = df.select_dtypes(include=np.number)
df[numeric_df.columns] = numeric_df.fillna(numeric_df.median())

# Now split the dataset into features (X) and target (y)
X = df.iloc[:, :-1].values  # Features (all columns except the last one)
y = df.iloc[:, -1].values   # Target variable (last column)

# Split the dataset into training and testing sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature values (mean=0, variance=1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Build the ANN model
model = Sequential([
    Dense(15, activation='tanh', input_shape=(X_train.shape[1],)),  # First hidden layer
    Dense(20, activation='tanh'),  # Second hidden layer
    Dense(15, activation='tanh'),  # Third hidden layer
    Dense(1, activation='linear')  # Output layer (for regression)
])

# Compile the model with SGD optimizer and MSE loss
model.compile(optimizer=SGD(), loss='mean_squared_error', metrics=['mse'])

# Train the model with 100 epochs and batch size 16
model.fit(X_train, y_train, epochs=100, batch_size=16, validation_data=(X_test, y_test))

# Evaluate the model on training and testing data
train_mse = model.evaluate(X_train, y_train, verbose=0)[1]
test_mse = model.evaluate(X_test, y_test, verbose=0)[1]

# Print the MSE for both training and testing data
print(f'Training MSE: {train_mse}')
print(f'Testing MSE: {test_mse}')

# Save the trained model to an .h5 file
model.save("housing_price_model.h5")

# Load the saved model for future predictions
from tensorflow.keras.models import load_model
loaded_model = load_model("housing_price_model.h5")

# Function to make predictions with the loaded model
def predict_price(new_data):
    new_data = scaler.transform([new_data])  # Scale the new data
    return loaded_model.predict(new_data)[0, 0]

# Example prediction: use one of the test samples
sample_input = X_test[0]  # You can choose any sample from the test data
predicted_price = predict_price(sample_input)
print(f'Predicted Price: {predicted_price}')

price               0
area                0
bedrooms            0
bathrooms           0
stories             0
mainroad            0
guestroom           0
basement            0
hotwaterheating     0
airconditioning     0
parking             0
prefarea            0
furnishingstatus    0
dtype: int64
price                int64
area                 int64
bedrooms             int64
bathrooms            int64
stories              int64
mainroad            object
guestroom           object
basement            object
hotwaterheating     object
airconditioning     object
parking              int64
prefarea            object
furnishingstatus    object
dtype: object


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 92ms/step - loss: 1.2573 - mse: 1.2573 - val_loss: 0.6653 - val_mse: 0.6653
Epoch 2/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 20ms/step - loss: 0.5477 - mse: 0.5477 - val_loss: 0.6262 - val_mse: 0.6262
Epoch 3/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 22ms/step - loss: 0.5123 - mse: 0.5123 - val_loss: 0.6157 - val_mse: 0.6157
Epoch 4/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - loss: 0.5018 - mse: 0.5018 - val_loss: 0.6114 - val_mse: 0.6114
Epoch 5/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 13ms/step - loss: 0.4880 - mse: 0.4880 - val_loss: 0.6142 - val_mse: 0.6142
Epoch 6/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - loss: 0.5061 - mse: 0.5061 - val_loss: 0.6083 - val_mse: 0.6083
Epoch 7/100
[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step - lo



Training MSE: 0.3704095184803009
Testing MSE: 0.7091282606124878
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 100ms/step
Predicted Price: 1.4182637929916382
