In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [None]:
# Load dataset
data = fetch_california_housing()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['MedHouseVal'] = data.target

In [None]:
# Visualizing Distributions
plt.figure(figsize=(12, 6))
sns.histplot(df['MedHouseVal'], kde=True, color='blue')
plt.title('Distribution of Median House Value')
plt.show()


In [None]:
# Correlation Heatmap
plt.figure(figsize=(10, 8))
sns.heatmap(df.corr(), annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Feature Correlation Heatmap')
plt.show()

# Correlation Heatmap

In [None]:
# Split Data
X = df.drop('MedHouseVal', axis=1)
y = df['MedHouseVal']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Scale Data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Baseline Model: Machine Learning (Linear Regression)

In [None]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Train Linear Regression
lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)

# Predict and Evaluate
lr_preds = lr_model.predict(X_test_scaled)
lr_rmse = np.sqrt(mean_squared_error(y_test, lr_preds))
lr_r2 = r2_score(y_test, lr_preds)

print(f"Baseline Linear Regression RMSE: {lr_rmse:.4f}")
print(f"Baseline Linear Regression R2: {lr_r2:.4f}")

# Improved Model: Deep Learning (Artificial Neural Network)

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models

# Define the Model
dnn_model = models.Sequential([
    layers.Dense(64, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    layers.Dense(32, activation='relu'),
    layers.Dense(16, activation='relu'),
    layers.Dense(1) # Output layer for regression
])

dnn_model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train the Model
history = dnn_model.fit(
    X_train_scaled, y_train, 
    epochs=50, 
    validation_split=0.2, 
    batch_size=32, 
    verbose=0
)
# Evaluate
dnn_preds = dnn_model.predict(X_test_scaled).flatten()
dnn_rmse = np.sqrt(mean_squared_error(y_test, dnn_preds))

# Comparison and Visualization

In [None]:
#  Plot Training History
plt.figure(figsize=(10, 5))
plt.plot(history.history['loss'], label='Train Loss')
plt.plot(history.history['val_loss'], label='Val Loss')
plt.title('Model Loss (MSE) During Training')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()