In [77]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
from tensorflow.keras.callbacks import TensorBoard
import datetime
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd

In [78]:
# Create data
data = pd.read_csv("../data/raw/HousingData.csv")
data.dropna(inplace=True)

X = data.drop(columns="TAX").values
y = data['TAX'].values


X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale input features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_train

array([[ 0.30669463, -0.46905615,  0.98516338, ..., -0.38387457,
         0.35489258, -0.85816974],
       [-0.35506872, -0.46905615, -0.75170852, ...,  0.32304459,
        -1.38799511,  2.1700169 ],
       [-0.40275616, -0.46905615,  0.37069021, ...,  0.44088232,
         0.64734992, -0.05432383],
       ...,
       [-0.41092235, -0.46905615, -1.0158882 , ...,  0.32014958,
        -0.32619052, -0.49478735],
       [ 0.54343825, -0.46905615,  0.98516338, ...,  0.44088232,
         0.6750148 , -0.86918133],
       [-0.40457292, -0.46905615,  2.08712834, ...,  0.23919645,
         0.64866729, -0.16443971]])

In [79]:
input_dim = X_train.shape[1]

# üìå Step 2: Create the Regression Model Function
def create_regression_model(num_layers=2, units=64, learning_rate=1e-3):
    model = Sequential()
    model.add(InputLayer(input_shape=(input_dim,)))
    for _ in range(num_layers):
        model.add(Dense(units, activation='relu'))
    model.add(Dense(1))  # Regression: No activation
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate),
                  loss='mse', metrics=['mae'])
    return model


In [80]:
layer_options = [1, 2, 3]
unit_options = [32, 64]

for num_layers in layer_options:
    for units in unit_options:
        # Create a unique log directory
        run_name = f"layers_{num_layers}_units_{units}_{datetime.datetime.now().strftime('%Y%m%d-%H%M%S')}"
        log_dir = os.path.join("logs", "regression_selection", run_name)
        tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1)

        print(f"üîÅ Training model: {num_layers} layers, {units} units")
        model = create_regression_model(num_layers=num_layers, units=units)
        model.fit(X_train, y_train,
                  epochs=20,
                  validation_data=(X_val, y_val),
                  callbacks=[tensorboard_callback],
                  verbose=0)


üîÅ Training model: 1 layers, 32 units




üîÅ Training model: 1 layers, 64 units
üîÅ Training model: 2 layers, 32 units
üîÅ Training model: 2 layers, 64 units
üîÅ Training model: 3 layers, 32 units
üîÅ Training model: 3 layers, 64 units


In [81]:
%reload_ext tensorboard
%load_ext tensorboard
%tensorboard --logdir logs/regression_selection


The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


Reusing TensorBoard on port 6008 (pid 29326), started 0:07:31 ago. (Use '!kill 29326' to kill it.)

In [None]:
# ### üìä Summary Analysis (Model Selection - Regression)

# | Model     | Train MSE | Val MSE | Overfit Gap | Verdict                        |
# |-----------|-----------|---------|-------------|--------------------------------|
# | 1L √ó 32   | 405.91    | 375.50  | 30.41       | ‚ùå Too shallow, high error     |
# | 1L √ó 64   | 397.83    | 368.12  | 29.71       | ‚ùå Slightly better, still high |
# | 2L √ó 32   | 284.17    | 262.63  | 21.54       | ‚úÖ Decent improvement          |
# | 2L √ó 64   | 158.46    | 164.23  | -5.77       | ‚úÖ‚úÖ Balanced, strong choice   |
# | 3L √ó 32   | 104.18    | 107.22  | -3.04       | ‚úÖ Very good generalization    |
# | 3L √ó 64   | 75.46     | 77.00   | -1.54       | üèÜ Best model overall ‚úÖ‚úÖ‚úÖ   |
