In [None]:
# Step 1: Import necessary libraries
!pip install numpy pandas matplotlib tensorflow seaborn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# For deep learning
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Input

# Step 2: Load the dataset
url = "https://raw.githubusercontent.com/selva86/datasets/master/BostonHousing.csv"
df = pd.read_csv(url)

# Step 3: Explore the dataset
df.head()

# Step 4: Preprocess the data
# Split the dataset into features and target variable
X = df.drop(columns=['medv'])  # Features
y = df['medv']  # Target variable (Median house price)

# Step 5: Normalize the features
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Step 6: Split the data into training and test sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Step 7: Build the DNN model
model = Sequential()

# Input layer (features)


model.add(Input(shape=(X_train.shape[1],)))
model.add(Dense(64, activation='relu'))


# Hidden layer
model.add(Dense(32, activation='relu'))

# Output layer (single value prediction)
model.add(Dense(1))

# Step 8: Compile the model
model.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error')

# Step 9: Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Step 10: Evaluate the model
loss = model.evaluate(X_test, y_test)
print(f"Test Loss: {loss}")

# Step 11: Make predictions
y_pred = model.predict(X_test)

# Improved visualization: residual plot (Actual - Predicted)
residuals = y_test - y_pred.flatten()  # Compute residuals (difference between actual and predicted values)

# Plot residuals to visually see the errors
plt.figure(figsize=(10,6))
plt.scatter(y_pred, residuals)
plt.axhline(y=0, color='r', linestyle='--')
plt.xlabel("Predicted Prices")
plt.ylabel("Residuals (Actual - Predicted)")
plt.title("Residual Plot: Actual vs Predicted Housing Prices")
plt.show()

# Step 13: Show a few examples of Actual vs Predicted values
example_comparison = pd.DataFrame({
    'Actual Price': y_test,
    'Predicted Price': y_pred.flatten(),
    'Difference': residuals
})

# Display the first 10 examples
print(example_comparison.head(10))

# Step 14: Plot the residual histogram for better understanding of prediction errors
plt.figure(figsize=(10,6))
sns.histplot(residuals, kde=True, color='blue', bins=30)
plt.xlabel("Residuals (Actual - Predicted)")
plt.ylabel("Frequency")
plt.title("Distribution of Residuals")
plt.show()

