In [None]:
import pandas as pd
import numpy as np
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pickle

# Load the California Housing dataset
california = fetch_california_housing()

In [None]:
california.data.shape

(20640, 8)

In [None]:
california.data.shape

(20640, 8)

In [None]:
california.data[:5]

array([[ 8.32520000e+00,  4.10000000e+01,  6.98412698e+00,
         1.02380952e+00,  3.22000000e+02,  2.55555556e+00,
         3.78800000e+01, -1.22230000e+02],
       [ 8.30140000e+00,  2.10000000e+01,  6.23813708e+00,
         9.71880492e-01,  2.40100000e+03,  2.10984183e+00,
         3.78600000e+01, -1.22220000e+02],
       [ 7.25740000e+00,  5.20000000e+01,  8.28813559e+00,
         1.07344633e+00,  4.96000000e+02,  2.80225989e+00,
         3.78500000e+01, -1.22240000e+02],
       [ 5.64310000e+00,  5.20000000e+01,  5.81735160e+00,
         1.07305936e+00,  5.58000000e+02,  2.54794521e+00,
         3.78500000e+01, -1.22250000e+02],
       [ 3.84620000e+00,  5.20000000e+01,  6.28185328e+00,
         1.08108108e+00,  5.65000000e+02,  2.18146718e+00,
         3.78500000e+01, -1.22250000e+02]])

In [None]:
california.feature_names

['MedInc',
 'HouseAge',
 'AveRooms',
 'AveBedrms',
 'Population',
 'AveOccup',
 'Latitude',
 'Longitude']

In [None]:
california.target_names

['MedHouseVal']

In [None]:
df = pd.DataFrame(california.data)

In [None]:
df.columns = california.feature_names

In [None]:
df['MedHouseVal'] = california.target

In [None]:

df

Unnamed: 0,MedInc,HouseAge,AveRooms,AveBedrms,Population,AveOccup,Latitude,Longitude,MedHouseVal
0,8.3252,41.0,6.984127,1.023810,322.0,2.555556,37.88,-122.23,4.526
1,8.3014,21.0,6.238137,0.971880,2401.0,2.109842,37.86,-122.22,3.585
2,7.2574,52.0,8.288136,1.073446,496.0,2.802260,37.85,-122.24,3.521
3,5.6431,52.0,5.817352,1.073059,558.0,2.547945,37.85,-122.25,3.413
4,3.8462,52.0,6.281853,1.081081,565.0,2.181467,37.85,-122.25,3.422
...,...,...,...,...,...,...,...,...,...
20635,1.5603,25.0,5.045455,1.133333,845.0,2.560606,39.48,-121.09,0.781
20636,2.5568,18.0,6.114035,1.315789,356.0,3.122807,39.49,-121.21,0.771
20637,1.7000,17.0,5.205543,1.120092,1007.0,2.325635,39.43,-121.22,0.923
20638,1.8672,18.0,5.329513,1.171920,741.0,2.123209,39.43,-121.32,0.847


In [None]:
X = pd.DataFrame(california.data, columns=california.feature_names)
y = pd.DataFrame(california.target, columns=["Median_House_Value"])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [17]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [18]:
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)
rf_model.fit(X_train_scaled, y_train.values.ravel())

In [19]:
y_train.values.shape

(16512, 1)

In [21]:
y_pred_rf = rf_model.predict(X_test_scaled)

In [22]:
print("Random Forest Regression Metrics:")
print(f"MAE: {mean_absolute_error(y_test, y_pred_rf)}")
print(f"MSE: {mean_squared_error(y_test, y_pred_rf)}")
print(f"R2 Score: {r2_score(y_test, y_pred_rf)}")

Random Forest Regression Metrics:
MAE: 0.3274252027374032
MSE: 0.255169737347244
R2 Score: 0.8052747336256919


In [23]:
with open("random_forest_model.pkl", "wb") as f:
    pickle.dump(rf_model, f)

In [24]:
import tensorflow as tf
from tensorflow import keras

# Build the model
dl_model = keras.Sequential([
    keras.layers.Dense(64, activation="relu", input_shape=(X_train_scaled.shape[1],)),
    keras.layers.Dense(32, activation="relu"),
    keras.layers.Dense(16, activation="relu"),
    keras.layers.Dense(1)  # Output layer for regression
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [25]:
dl_model.compile(optimizer="adam", loss="mse", metrics=["mae"])

In [27]:
dl_model.fit(X_train_scaled, y_train, validation_data=(X_test_scaled, y_test), epochs=50, batch_size=32)

Epoch 1/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 6ms/step - loss: 1.9885 - mae: 0.9454 - val_loss: 0.4297 - val_mae: 0.4807
Epoch 2/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5ms/step - loss: 0.4150 - mae: 0.4585 - val_loss: 0.3697 - val_mae: 0.4304
Epoch 3/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 5ms/step - loss: 0.3489 - mae: 0.4183 - val_loss: 0.3555 - val_mae: 0.4365
Epoch 4/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 4ms/step - loss: 0.3485 - mae: 0.4105 - val_loss: 0.3401 - val_mae: 0.4134
Epoch 5/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3332 - mae: 0.4041 - val_loss: 0.3379 - val_mae: 0.4061
Epoch 6/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.3094 - mae: 0.3900 - val_loss: 0.3464 - val_mae: 0.4042
Epoch 7/50
[1m516/516[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - 

<keras.src.callbacks.history.History at 0x794d2fc78c10>

In [28]:
dl_model.save("deep_learning_model.h5")



In [29]:
with open("random_forest_model.pkl", "rb") as f:
    loaded_rf_model = pickle.load(f)


y_pred_loaded_rf = loaded_rf_model.predict(X_test_scaled)
print(f"Loaded RF Model R2 Score: {r2_score(y_test, y_pred_loaded_rf)}")

Loaded RF Model R2 Score: 0.8052747336256919
