<a href="https://colab.research.google.com/github/OmerAbdel-aziz/jupyter-notebooks/blob/main/ANN_house_prices_prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import kagglehub
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.metrics import mean_absolute_error, mean_squared_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
import matplotlib.pyplot as plt
import os

# Download the dataset
path = kagglehub.dataset_download("yasserh/housing-prices-dataset")
print("Path to dataset files:", path)

# Load the dataset
csv_path = os.path.join(path, 'Housing.csv')
df = pd.read_csv(csv_path)

# Handle missing values
numerical_cols = df.select_dtypes(include=['float64', 'int64']).columns
df[numerical_cols] = df[numerical_cols].fillna(df[numerical_cols].mean())
categorical_cols = df.select_dtypes(include=['object']).columns
for col in categorical_cols:
    df[col] = df[col].fillna(df[col].mode()[0])  # Fix pandas warning

# Encode categorical variables
le = LabelEncoder()
for col in categorical_cols:
    df[col] = le.fit_transform(df[col])

# Select features and target
features = ['area', 'bedrooms', 'bathrooms', 'mainroad', 'prefarea', 'furnishingstatus',
            'stories', 'parking', 'basement', 'hotwaterheating', 'airconditioning']
X = df[features]
y = df['price']

# Scale features
scaler_X = StandardScaler()
X_scaled = scaler_X.fit_transform(X)

# Scale target variable (price)
scaler_y = StandardScaler()
y_scaled = scaler_y.fit_transform(y.values.reshape(-1, 1)).flatten()

# Split the data (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, random_state=42)

# Build the ANN model
model = Sequential([
    Input(shape=(X_train.shape[1],)),  # Explicit Input layer to suppress warning
    Dense(64, activation='relu'),
    Dropout(0.1),  # Reduced dropout rate
    Dense(32, activation='relu'),
    Dropout(0.1),
    Dense(16, activation='relu'),  # Added third hidden layer
    Dropout(0.1),
    Dense(1)  # Output layer for regression
])

# Compile the model with a lower learning rate
model.compile(optimizer=Adam(learning_rate=0.0005), loss='mean_squared_error')

# Train the model
history = model.fit(X_train, y_train, epochs=150, batch_size=10, validation_split=0.2, verbose=1)

# Predict and evaluate on test set
y_pred_scaled = model.predict(X_test, verbose=0)  # Suppress prediction output
y_pred = scaler_y.inverse_transform(y_pred_scaled)  # Inverse transform predictions
y_test_orig = scaler_y.inverse_transform(y_test.reshape(-1, 1))  # Inverse transform actual values

mse = mean_squared_error(y_test, y_pred_scaled)  # MSE on scaled values
mae = mean_absolute_error(y_test_orig, y_pred)  # MAE on original scale
print(f'Test Mean Squared Error (MSE, scaled): {mse:.4f}')
print(f'Test Mean Absolute Error (MAE, original scale): {mae:.4f}')

# Plot training and validation loss
plt.figure(figsize=(10, 6))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss (MSE)')
plt.title('Training and Validation Loss Over Epochs')
plt.legend()
plt.grid(True)
plt.savefig('loss_plot.png')
plt.close()

# Plot actual vs. predicted prices (original scale)
plt.figure(figsize=(10, 6))
plt.scatter(y_test_orig, y_pred, alpha=0.5)
plt.xlabel('Actual Prices')
plt.ylabel('Predicted Prices')
plt.title('Actual vs. Predicted House Prices')
plt.plot([y_test_orig.min(), y_test_orig.max()], [y_test_orig.min(), y_test_orig.max()], 'r--', lw=2)
plt.grid(True)
plt.savefig('actual_vs_predicted.png')
plt.close()

Path to dataset files: /kaggle/input/housing-prices-dataset
Epoch 1/150
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 35ms/step - loss: 0.8753 - val_loss: 0.5465
Epoch 2/150
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.5138 - val_loss: 0.4211
Epoch 3/150
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 17ms/step - loss: 0.4312 - val_loss: 0.3836
Epoch 4/150
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 0.4906 - val_loss: 0.3589
Epoch 5/150
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.3457 - val_loss: 0.3482
Epoch 6/150
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.3714 - val_loss: 0.3432
Epoch 7/150
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 16ms/step - loss: 0.3541 - val_loss: 0.3383
Epoch 8/150
[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step - loss: 0.3417 - 