In [1]:
import os
import sys

import pickle
from typing import Optional
from pathlib import Path

import tensorflow as tf
import numpy as np
import pandas as pd


In [2]:
PAKAGE_ROOT = Path(os.path.abspath(os.path.dirname(__name__)))
sys.path.append(str(PAKAGE_ROOT.parent))

In [3]:
from train.src.model import HousingDataset

In [4]:
class PreprocessConfigurations(object):
    feature_names = [
        "MedInc",
        "HouseAge",
        "AveRooms",
        "AveBedrms",
        "Population",
        "AveOccup",
        "Latitude",
        "Longitude",
    ]
    target_names = ["MedHouseVal"]
    scaler = "standard"

    train_prefix = "train"
    train_file_name = "housing_train.csv"
    valid_prefix = "valid"
    valid_file_name = "housing_valid.csv"
    test_prefix = "test"
    test_file_name = "housing_test.csv"
    scaler_prefix = "scaler"
    scaler_name = "standard_scaler.pkl"

In [5]:
data_directory = "../preprocess/data/preprocess"

In [6]:
train_set = HousingDataset(data_directory=data_directory, file_prefix=PreprocessConfigurations.train_prefix, file_name=PreprocessConfigurations.train_file_name, scaler_prefix=PreprocessConfigurations.scaler_prefix, scaler_name=PreprocessConfigurations.scaler_name)
valid_set = HousingDataset(data_directory=data_directory, file_prefix=PreprocessConfigurations.valid_prefix, file_name=PreprocessConfigurations.valid_file_name, scaler_prefix=PreprocessConfigurations.scaler_prefix, scaler_name=PreprocessConfigurations.scaler_name)
test_set = HousingDataset(data_directory=data_directory, file_prefix=PreprocessConfigurations.test_prefix, file_name=PreprocessConfigurations.test_file_name, scaler_prefix=PreprocessConfigurations.scaler_prefix, scaler_name=PreprocessConfigurations.scaler_name)

In [7]:
train_dataset = train_set.csv_reader_dataset()
valid_dataset = valid_set.csv_reader_dataset()
test_dataset = test_set.csv_reader_dataset()

2024-08-16 02:05:04.121935: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2 Max
2024-08-16 02:05:04.121955: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 32.00 GB
2024-08-16 02:05:04.121959: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 10.67 GB
2024-08-16 02:05:04.122073: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-08-16 02:05:04.122106: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [8]:
n_inputs = len(PreprocessConfigurations.feature_names)

In [9]:
def simple_model(input_shape:tuple, output_dim:int):
    model_input = tf.keras.Input(shape=input_shape)
    x = tf.keras.layers.Dense(16, activation="relu")(model_input)
    x = tf.keras.layers.Dense(8, activation="relu")(x)
    model_output = tf.keras.layers.Dense(output_dim, activation=None)(x)

    model = tf.keras.Model(model_input, model_output, name="simple_model")

    return model

In [10]:
model = simple_model(input_shape=(n_inputs, ), output_dim=1)
model.summary()

Model: "simple_model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 8)]               0         
                                                                 
 dense (Dense)               (None, 16)                144       
                                                                 
 dense_1 (Dense)             (None, 8)                 136       
                                                                 
 dense_2 (Dense)             (None, 1)                 9         
                                                                 
Total params: 289 (1.13 KB)
Trainable params: 289 (1.13 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [11]:
optimizer = tf.keras.optimizers.legacy.Adam()
loss_fn = tf.keras.losses.MeanSquaredError()
train_metric = tf.keras.metrics.RootMeanSquaredError()
val_metric = tf.keras.metrics.RootMeanSquaredError()

In [12]:
train_loss = tf.keras.metrics.Mean('train_loss', dtype=tf.float32)
valid_loss = tf.keras.metrics.Mean('test_loss', dtype=tf.float32)

In [13]:
import datetime
current_time = datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
train_log_dir = 'logs/gradient_tape/' + current_time + '/train'
test_log_dir = 'logs/gradient_tape/' + current_time + '/test'
train_summary_writer = tf.summary.create_file_writer(train_log_dir)
test_summary_writer = tf.summary.create_file_writer(test_log_dir)

In [14]:
import time

epochs = 10
for epoch in range(epochs):
    print("\n Start of epoch %d" % (epoch, ))
    start_time = time.time()

    for step, (x_batch_train, y_batch_train) in enumerate(train_dataset):
        with tf.GradientTape() as tape:
            y_pred = model(x_batch_train, training=True)
            loss_value = loss_fn(y_batch_train, y_pred)
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))


        # Update training metric
        train_loss(loss_value)
        train_metric.update_state(y_batch_train, y_pred)
    
        # Log every 200 batches.
        if step % 200 == 0:
            print(
                "Training loss (for one batch) at step %d: %.4f"
                % (step, float(loss_value))
            )
            print("Seen so far: %d samples" % ((step + 1) * 32))
    
    train_rmse = train_metric.result()
    with train_summary_writer.as_default():
        tf.summary.scalar('loss', train_loss.result(), step=epoch)
        tf.summary.scalar('rmse', train_metric.result(), step=epoch)

    print("Training rmse over epoch: %.4f" % (float(train_rmse),))

    # Reset training metrics at the end of each epoch
    train_metric.reset_states()
    train_loss.reset_states()
    # Run a validation loop at the end of each epoch.
    for x_batch_val, y_batch_val in valid_dataset:
        val_logits = model(x_batch_val, training=False)
        # Update val metrics
        val_losses = loss_fn(y_batch_val, val_logits)
        val_metric.update_state(y_batch_val, val_logits)
    
    valid_loss(val_losses)
    val_rmse = val_metric.result()

    with test_summary_writer.as_default():
        tf.summary.scalar('loss', valid_loss.result(), step=epoch)
        tf.summary.scalar('rmse', val_rmse, step=epoch)
    val_metric.reset_states()
    valid_loss.reset_states()
    print("Validation rmse: %.4f" % (float(val_rmse),))
    print("Time taken: %.2fs" % (time.time() - start_time))



 Start of epoch 0
Training loss (for one batch) at step 0: 3.6351
Seen so far: 32 samples
Training loss (for one batch) at step 200: 0.9568
Seen so far: 6432 samples
Training rmse over epoch: 1.1915
Validation rmse: 1.8812
Time taken: 9.35s

 Start of epoch 1
Training loss (for one batch) at step 0: 0.4609
Seen so far: 32 samples
Training loss (for one batch) at step 200: 0.5616
Seen so far: 6432 samples
Training rmse over epoch: 0.7718
Validation rmse: 1.3605
Time taken: 4.39s

 Start of epoch 2
Training loss (for one batch) at step 0: 0.5567
Seen so far: 32 samples
Training loss (for one batch) at step 200: 0.4279
Seen so far: 6432 samples
Training rmse over epoch: 0.6852
Validation rmse: 1.0038
Time taken: 4.30s

 Start of epoch 3
Training loss (for one batch) at step 0: 0.3431
Seen so far: 32 samples
Training loss (for one batch) at step 200: 0.4156
Seen so far: 6432 samples
Training rmse over epoch: 0.6532
Validation rmse: 0.7269
Time taken: 4.27s

 Start of epoch 4
Training loss

In [17]:
%tensorboard --logdir logs/gradient_tape

UsageError: Line magic function `%tensorboard` not found.


In [None]:
@tf.function
def train_step(x, y):
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss_value = loss_fn(y, logits)
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    train_acc_metric.update_state(y, logits)
    return loss_value

In [None]:
@tf.function
def test_step(x, y):
    val_logits = model(x, training=False)
    val_acc_metric.update_state(y, val_logits)