# GAN MODEL TRAINING
---

In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from helpers import find_missing_date_ranges
from enums import (
    DatasetColumns,
    WeatherDatasetColumns
)

from sklearn.preprocessing import MinMaxScaler

In [2]:
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    tf.config.experimental.set_memory_growth(physical_devices[0], True)
    tf.config.set_visible_devices(physical_devices[0], 'GPU')

# Ensure all operations run on GPU when possible
tf.config.set_soft_device_placement(True)

## Data Preparation
---


In [12]:
# Load data
FILE_NAME = "dataset.csv"
WEATHER_DATASET = "dataset_weather.csv"

original_data = pd.read_csv(
    FILE_NAME,
    parse_dates=[DatasetColumns.STATISTICAL_PERIOD.value],
    index_col=DatasetColumns.STATISTICAL_PERIOD.value,
)

weather_data = pd.read_csv(
    WEATHER_DATASET,
    parse_dates=[WeatherDatasetColumns.DATETIME.value],
    index_col=WeatherDatasetColumns.DATETIME.value,
).asfreq("h")

weather_features = [
    WeatherDatasetColumns.TEMPERATURE_C.value,
    WeatherDatasetColumns.HUMIDITY_PERCENT.value,
]

In [13]:
# Find missing date ranges
gap_start, gap_end = find_missing_date_ranges(
    original_data, DatasetColumns.STATISTICAL_PERIOD.value
)
gap_dates = pd.date_range(start=gap_start, end=gap_end, freq="h")

In [14]:
# Data Splitting
pre_gap_data = original_data[original_data.index < gap_start].asfreq("h")
post_gap_data = original_data[original_data.index >= gap_end].asfreq("h")

pre_gap_train_size = int(len(pre_gap_data) * 0.8)
pre_gap_train = pre_gap_data.iloc[:pre_gap_train_size].copy()
pre_gap_test = pre_gap_data.iloc[pre_gap_train_size:]

pre_gap_train.loc[:, DatasetColumns.PV_YIELD.value] = pre_gap_train[
    DatasetColumns.PV_YIELD.value
].interpolate(method="linear")


pre_weather_data = weather_data[weather_data.index < gap_start].bfill()
pre_weather_data = pre_weather_data.reindex(pre_gap_data.index)
pre_weather_data_test = pre_weather_data.reindex(pre_gap_test.index)


gap_weather_data = weather_data.reindex(gap_dates).ffill()
post_weather_data = weather_data[weather_data.index >= gap_end].bfill()

pre_gap_train_combined = pre_gap_train.join(
    pre_weather_data[weather_features], how="inner"
)
pre_gap_test_combined = pre_gap_test.join(
    pre_weather_data_test[weather_features], how="inner"
)

## GAN Setup
---

In [5]:
# GAN Hyperparameters
LATENT_DIM = 128
LEARNING_RATE_G = 2e-5  
LEARNING_RATE_D = 1e-5
BATCH_SIZE = 32
EPOCHS = 1000

In [6]:
# WGAN-GP Architecure
def build_generator(latent_dim, num_features):
    with tf.device("/GPU:0"):
        noise_input = tf.keras.Input(shape=(latent_dim,))
        weather_input = tf.keras.Input(shape=(num_features,))

        # Rest of the generator code remains the same
        weather_x = tf.keras.layers.BatchNormalization()(weather_input)
        weather_x = tf.keras.layers.Dense(32)(weather_x)
        weather_x = tf.keras.layers.LeakyReLU(alpha=0.2)(weather_x)
        weather_x = tf.keras.layers.BatchNormalization()(weather_x)

        noise_x = tf.keras.layers.Dense(64)(noise_input)
        noise_x = tf.keras.layers.LeakyReLU(alpha=0.2)(noise_x)
        noise_x = tf.keras.layers.BatchNormalization()(noise_x)

        x = tf.keras.layers.Concatenate()([noise_x, weather_x])

        def dense_block(x, units, dropout_rate=0.3):
            skip = x
            skip = (
                tf.keras.layers.Dense(units)(skip) if skip.shape[-1] != units else skip
            )

            x = tf.keras.layers.Dense(units)(x)
            x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
            x = tf.keras.layers.BatchNormalization()(x)
            x = tf.keras.layers.Dropout(dropout_rate)(x)

            x = tf.keras.layers.Dense(units)(x)
            x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
            x = tf.keras.layers.BatchNormalization()(x)

            return tf.keras.layers.Add()([x, skip])

        x = dense_block(x, 512, 0.3)
        x = dense_block(x, 256, 0.3)
        x = dense_block(x, 256, 0.3)
        x = dense_block(x, 128, 0.3)
        x = dense_block(x, 128, 0.3)

        x = tf.keras.layers.Dense(64)(x)
        x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
        x = tf.keras.layers.BatchNormalization()(x)
        x = tf.keras.layers.Dropout(0.2)(x)

        output = tf.keras.layers.Dense(1, activation="sigmoid")(x)

        return tf.keras.Model(inputs=[noise_input, weather_input], outputs=output)


def build_discriminator(num_features):
    with tf.device("/GPU:0"):
        # Rest of discriminator code remains the same
        pv_input = tf.keras.Input(shape=(1,))
        weather_input = tf.keras.Input(shape=(num_features,))

        pv_normalized = tf.keras.layers.BatchNormalization()(pv_input)
        weather_normalized = tf.keras.layers.BatchNormalization()(weather_input)

        x = tf.keras.layers.Concatenate()([pv_normalized, weather_normalized])

        def critic_block(x, units, dropout_rate=0.3):
            x = tf.keras.layers.Dense(units)(x)
            x = tf.keras.layers.LeakyReLU(alpha=0.2)(x)
            x = tf.keras.layers.BatchNormalization()(x)
            return tf.keras.layers.Dropout(dropout_rate)(x)

        x = critic_block(x, 128, 0.3)
        x = critic_block(x, 256, 0.3)
        x = critic_block(x, 512, 0.3)
        x = critic_block(x, 256, 0.3)

        output = tf.keras.layers.Dense(1)(x)

        return tf.keras.Model(
            inputs=[pv_input, weather_input], outputs=output, name="Discriminator"
        )


class SolarGAN(tf.keras.Model):
    def __init__(self, latent_dim, num_features):
        super(SolarGAN, self).__init__()
        self.latent_dim = latent_dim
        with tf.device("/GPU:0"):
            self.generator = build_generator(latent_dim, num_features)
            self.discriminator = build_discriminator(num_features)
        self.gp_weight = tf.cast(10.0, tf.float32)

    def compile(self, g_optimizer, d_optimizer):
        super(SolarGAN, self).compile()
        self.g_optimizer = g_optimizer
        self.d_optimizer = d_optimizer

    @tf.function
    def train_step(self, data):
        real_pv, weather_features = data
        batch_size = tf.shape(real_pv)[0]
        d_steps = 3
        g_steps = 1

        d_loss_avg = tf.cast(0.0, tf.float32)
        for _ in range(d_steps):
            noise = tf.random.normal([batch_size, self.latent_dim], dtype=tf.float32)

            with tf.GradientTape() as tape:
                fake_pv = self.generator([noise, weather_features], training=True)
                fake_pv = tf.cast(fake_pv, tf.float32)

                real_pred = self.discriminator(
                    [real_pv, weather_features], training=True
                )
                fake_pred = self.discriminator(
                    [fake_pv, weather_features], training=True
                )

                alpha = tf.random.uniform([batch_size, 1], 0.0, 1.0, dtype=tf.float32)
                interpolated = real_pv + alpha * (fake_pv - real_pv)

                with tf.GradientTape() as gp_tape:
                    gp_tape.watch(interpolated)
                    interp_pred = self.discriminator(
                        [interpolated, weather_features], training=True
                    )

                grads = gp_tape.gradient(interp_pred, interpolated)
                grad_norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=1))
                gradient_penalty = tf.reduce_mean(tf.square(grad_norm - 1.0))

                d_loss = (
                    tf.reduce_mean(fake_pred)
                    - tf.reduce_mean(real_pred)
                    + self.gp_weight * gradient_penalty
                )

            d_gradients = tape.gradient(d_loss, self.discriminator.trainable_variables)
            self.d_optimizer.apply_gradients(
                zip(d_gradients, self.discriminator.trainable_variables)
            )
            d_loss_avg += d_loss

        g_loss_avg = tf.cast(0.0, tf.float32)
        for _ in range(g_steps):
            noise = tf.random.normal([batch_size, self.latent_dim], dtype=tf.float32)

            with tf.GradientTape() as tape:
                fake_pv = self.generator([noise, weather_features], training=True)
                fake_pv = tf.cast(fake_pv, tf.float32)
                fake_pred = self.discriminator(
                    [fake_pv, weather_features], training=True
                )

                wasserstein_loss = -tf.reduce_mean(fake_pred)
                l1_loss = 0.2 * tf.reduce_mean(tf.abs(fake_pv - real_pv))
                l2_loss = 0.1 * tf.reduce_mean(tf.square(fake_pv - real_pv))
                smoothness_loss = 0.1 * tf.reduce_mean(
                    tf.abs(fake_pv[1:] - fake_pv[:-1])
                )

                g_loss = wasserstein_loss + l1_loss + l2_loss + smoothness_loss

            g_gradients = tape.gradient(g_loss, self.generator.trainable_variables)
            self.g_optimizer.apply_gradients(
                zip(g_gradients, self.generator.trainable_variables)
            )
            g_loss_avg += g_loss

        return {
            "d_loss": d_loss_avg / tf.cast(d_steps, tf.float32),
            "g_loss": g_loss_avg / tf.cast(g_steps, tf.float32),
        }

In [7]:
# Prepare Data
def prepare_data(pre_gap_train_combined, pre_gap_test_combined, weather_features):
    combined_columns = [DatasetColumns.PV_YIELD.value] + weather_features
    scaler = MinMaxScaler()

    with tf.device('/GPU:0'):
        train_scaled = scaler.fit_transform(pre_gap_train_combined[combined_columns])
        train_pv = tf.cast(train_scaled[:, 0:1], tf.float32)
        train_weather = tf.cast(train_scaled[:, 1:], tf.float32)

        test_scaled = scaler.transform(pre_gap_test_combined[combined_columns])
        test_pv = tf.cast(test_scaled[:, 0:1], tf.float32)
        test_weather = tf.cast(test_scaled[:, 1:], tf.float32)

        # Use tf.data.Dataset with prefetch for better GPU utilization
        train_dataset = tf.data.Dataset.from_tensor_slices((train_pv, train_weather))
        train_dataset = train_dataset.shuffle(buffer_size=1000).batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

        test_dataset = tf.data.Dataset.from_tensor_slices((test_pv, test_weather))
        test_dataset = test_dataset.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

    return train_dataset, test_dataset, scaler

In [8]:
# Train SOLAR GAN
def train_solar_gan(train_dataset, num_features):

    with tf.device('/GPU:0'):
      solar_gan = SolarGAN(LATENT_DIM, num_features)
      solar_gan.compile(
          g_optimizer=tf.keras.optimizers.Adam(
              learning_rate=LEARNING_RATE_G, beta_1=0.5, beta_2=0.9
          ),
          d_optimizer=tf.keras.optimizers.Adam(
              learning_rate=LEARNING_RATE_D, beta_1=0.5, beta_2=0.9
          ),
      )

      history = {"d_loss": [], "g_loss": []}
      best_loss = float("inf")
      patience = 100
      patience_counter = 0
      min_epochs = 300

      initial_lr_g = 2e-5
      initial_lr_d = 1e-5

      for epoch in range(EPOCHS):

          if epoch > 0 and epoch % 200 == 0:
              solar_gan.g_optimizer.learning_rate = LEARNING_RATE_G * 0.9
              solar_gan.d_optimizer.learning_rate = LEARNING_RATE_D * 0.9
              initial_lr_g *= 0.9
              initial_lr_d *= 0.9

          d_losses = []
          g_losses = []

          for batch_data in train_dataset:
              losses = solar_gan.train_step(batch_data)
              d_losses.append(float(losses["d_loss"]))
              g_losses.append(float(losses["g_loss"]))

          avg_d_loss = np.mean(d_losses)
          avg_g_loss = np.mean(g_losses)
          history["d_loss"].append(avg_d_loss)
          history["g_loss"].append(avg_g_loss)

          print(f"Epoch {epoch + 1}/{EPOCHS}")
          print(f"D Loss: {avg_d_loss:.4f} | G Loss: {avg_g_loss:.4f}")

          if epoch >= min_epochs:
              current_loss = abs(avg_d_loss) + abs(avg_g_loss)
              if current_loss < best_loss * 0.999:
                  best_loss = current_loss
                  patience_counter = 0
              else:
                  patience_counter += 1

              if patience_counter >= patience:
                  print(f"Early stopping triggered at epoch {epoch + 1}")
                  break

      return solar_gan, history

In [9]:
# Visualize training progress
def plot_training_history(history):
    plt.figure(figsize=(12, 5))
    plt.plot(history["d_loss"], label="Discriminator Loss")
    plt.plot(history["g_loss"], label="Generator Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.title("Training History")
    plt.grid(True)
    plt.show()

In [10]:
# Evaluate Model
def generate_predictions(model, weather_features, scaler):
    weather_features = tf.cast(weather_features, tf.float32)
    batch_size = tf.shape(weather_features)[0]

    noise = tf.random.normal([batch_size, LATENT_DIM], dtype=tf.float32)    
    predictions_scaled = model.generator([noise, weather_features], training=False)

    predictions_with_weather = np.concatenate(
        [predictions_scaled.numpy(), weather_features.numpy()], axis=1
    )
    predictions = scaler.inverse_transform(predictions_with_weather)[:, 0]

    return predictions

def evaluate_model(model, test_dataset, scaler):
    all_predictions = []
    all_true_values = []

    for test_pv, test_weather in test_dataset:
        batch_predictions = generate_predictions(model, test_weather, scaler)
        all_predictions.extend(batch_predictions)
        all_true_values.extend(test_pv.numpy().flatten())

    all_predictions = np.array(all_predictions)
    all_true_values = np.array(all_true_values)
    
    mse = np.mean((all_predictions - all_true_values) ** 2)
    mae = np.mean(np.abs(all_predictions - all_true_values))

    print(f"Mean Squared Error: {mse}")
    print(f"Mean Absolute Error: {mae:.4f}")

    return all_predictions, all_true_values

## `pre_gap_data` training model
---

In [15]:
# Prepare PRE-GAP train and test data
train_dataset, test_dataset, scaler = prepare_data(
    pre_gap_train_combined,
    pre_gap_test_combined,
    weather_features
)

2025-02-11 22:41:39.868247: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2025-02-11 22:41:39.868474: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2025-02-11 22:41:39.868497: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2025-02-11 22:41:39.868734: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2025-02-11 22:41:39.868812: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [None]:
# Train the model
num_weather_features = len(weather_features)
solar_gan, history = train_solar_gan(train_dataset, num_weather_features)
plot_training_history(history)

2025-02-11 22:41:47.658742: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


In [None]:
# Evaluating model on test set
predictions, true_values = evaluate_model(solar_gan, test_dataset, scaler)

plt.figure(figsize=(12, 6))
plt.plot(true_values, label='True Values', color='orange')
plt.plot(predictions, label='Predicted Values',color='green')
plt.legend()
plt.title('WGAP-GP Predictions vs Actual')
plt.show()