# GAN MODEL TRAINING
---

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from helpers import (
    find_missing_date_ranges,
)

from gan import (build_generator, build_discriminator, SolarGAN)

from enums import (
    DatasetColumns,
    WeatherDatasetColumns
)

from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.seasonal import seasonal_decompose

In [2]:
print(tf.keras.__version__)

3.4.0


## Data Preparation
---


In [3]:
# Load data
FILE_NAME = "dataset.csv"
WEATHER_DATASET = "dataset_weather.csv"


original_data = pd.read_csv(
    FILE_NAME,
    parse_dates=[DatasetColumns.STATISTICAL_PERIOD.value],
    index_col=DatasetColumns.STATISTICAL_PERIOD.value,
)


weather_data = pd.read_csv(
    WEATHER_DATASET,
    parse_dates=[WeatherDatasetColumns.DATETIME.value],
    index_col=WeatherDatasetColumns.DATETIME.value,
).asfreq("h")

weather_features = [
    WeatherDatasetColumns.TEMPERATURE_C.value,
    WeatherDatasetColumns.HUMIDITY_PERCENT.value,
]

In [4]:
# Find missing date ranges
gap_start, gap_end = find_missing_date_ranges(
    original_data, DatasetColumns.STATISTICAL_PERIOD.value
)
gap_dates = pd.date_range(start=gap_start, end=gap_end, freq="h")

In [5]:
# Data Splitting
pre_gap_data = original_data[original_data.index < gap_start].asfreq("h")
post_gap_data = original_data[original_data.index >= gap_end].asfreq("h")

pre_gap_train_size = int(len(pre_gap_data) * 0.8)
pre_gap_train = pre_gap_data.iloc[:pre_gap_train_size].copy()
pre_gap_test = pre_gap_data.iloc[pre_gap_train_size:]

pre_gap_train.loc[:, DatasetColumns.PV_YIELD.value] = pre_gap_train[
    DatasetColumns.PV_YIELD.value
].interpolate(method="linear")


pre_weather_data = weather_data[weather_data.index < gap_start].bfill()
pre_weather_data = pre_weather_data.reindex(pre_gap_data.index)
pre_weather_data_test = pre_weather_data.reindex(pre_gap_test.index)


gap_weather_data = weather_data.reindex(gap_dates).ffill()
post_weather_data = weather_data[weather_data.index >= gap_end].bfill()


pre_gap_train_combined = pre_gap_train.join(
    pre_weather_data[weather_features], how="inner"
)
pre_gap_test_combined = pre_gap_test.join(
    pre_weather_data_test[weather_features], how="inner"
)

## GAN Setup
---

In [6]:
# GAN Hyperparameters
LATENT_DIM = 50
LEARNING_RATE = 0.0001
BATCH_SIZE = 64
EPOCHS = 1000

In [7]:
def prepare_data(pre_gap_train_combined, pre_gap_test_combined, weather_features):
    # Scale the data
    combined_columns = [DatasetColumns.PV_YIELD.value] + weather_features
    scaler = MinMaxScaler()

    # Fit and transform training data
    train_scaled = scaler.fit_transform(pre_gap_train_combined[combined_columns])
    train_pv = train_scaled[:, 0:1].astype(np.float32)  # Convert to float32
    train_weather = train_scaled[:, 1:].astype(np.float32)  # Convert to float32

    # Transform test data
    test_scaled = scaler.transform(pre_gap_test_combined[combined_columns])
    test_pv = test_scaled[:, 0:1].astype(np.float32)
    test_weather = test_scaled[:, 1:].astype(np.float32)

    # Create TensorFlow datasets with float32 data
    train_dataset = tf.data.Dataset.from_tensor_slices(
        (tf.cast(train_pv, tf.float32), tf.cast(train_weather, tf.float32))
    )
    train_dataset = train_dataset.shuffle(buffer_size=1000).batch(BATCH_SIZE)

    test_dataset = tf.data.Dataset.from_tensor_slices(
        (tf.cast(test_pv, tf.float32), tf.cast(test_weather, tf.float32))
    )
    test_dataset = test_dataset.batch(BATCH_SIZE)

    return train_dataset, test_dataset, scaler

In [8]:
# function to visualize training progress
def plot_training_history(history):
    plt.figure(figsize=(12, 5))
    plt.plot(history["d_loss"], label="Discriminator Loss")
    plt.plot(history["g_loss"], label="Generator Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.title("Training History")
    plt.grid(True)
    plt.show()

In [9]:
def train_solar_gan(train_dataset, num_features):
    # Configure TensorFlow for metal
    try:
        tf.config.experimental.set_memory_growth(
            tf.config.list_physical_devices("GPU")[0], True
        )
    except:
        pass

    solar_gan = SolarGAN(LATENT_DIM, num_features)

    initial_lr = 2e-4  # Slightly reduced learning rate

    solar_gan.compile(
        g_optimizer=tf.keras.optimizers.Adam(initial_lr, beta_1=0.0, beta_2=0.9),
        d_optimizer=tf.keras.optimizers.Adam(initial_lr, beta_1=0.0, beta_2=0.9),
    )

    history = {"d_loss": [], "g_loss": []}
    best_loss = float("inf")
    patience = 50  # Reduced patience
    patience_counter = 0
    min_epochs = 300  # Reduced minimum epochs

    warmup_epochs = 10  # Reduced warmup

    for epoch in range(EPOCHS):
        epoch_d_loss = []
        epoch_g_loss = []

        if epoch < warmup_epochs:
            current_lr = initial_lr * (epoch + 1) / warmup_epochs
        else:
            current_lr = initial_lr * (0.98 ** (epoch - warmup_epochs))

        solar_gan.g_optimizer = tf.keras.optimizers.Adam(
            current_lr, beta_1=0.0, beta_2=0.9
        )
        solar_gan.d_optimizer = tf.keras.optimizers.Adam(
            current_lr, beta_1=0.0, beta_2=0.9
        )

        for batch_data in train_dataset:
            # Reduced discriminator steps
            d_steps = 3 if np.mean(epoch_d_loss) > 5 else 2
            for _ in range(d_steps):
                losses = solar_gan.train_step(batch_data)
                epoch_d_loss.append(float(losses["d_loss"]))
                epoch_g_loss.append(float(losses["g_loss"]))

        avg_d_loss = np.mean(epoch_d_loss)
        avg_g_loss = np.mean(epoch_g_loss)

        history["d_loss"].append(avg_d_loss)
        history["g_loss"].append(avg_g_loss)

        if (epoch + 1) % 5 == 0:  # More frequent updates
            print(
                f"Epoch {epoch + 1}/{EPOCHS} | D Loss: {avg_d_loss:.4f} | G Loss: {avg_g_loss:.4f}"
            )
            print(f"Current learning rate: {current_lr:.6f}")

        if epoch >= min_epochs:
            current_loss = abs(avg_d_loss)
            if current_loss < best_loss * 1.1:
                best_loss = min(current_loss, best_loss)
                patience_counter = 0
            else:
                patience_counter += 1

            if patience_counter >= patience:
                print(f"Early stopping triggered at epoch {epoch + 1}")
                break

    return solar_gan, history

In [10]:
# Evaluate Model
def generate_predictions(model, weather_features, scaler):
    # Ensure weather_features is float32
    weather_features = tf.cast(weather_features, tf.float32)
    batch_size = tf.shape(weather_features)[0]

    # Create noise input
    noise = tf.random.normal([batch_size, LATENT_DIM], dtype=tf.float32)

    # Generate predictions by passing noise and weather features separately
    predictions_scaled = model.generator([noise, weather_features], training=False)

    # Convert predictions to numpy and prepare for inverse transform
    predictions_with_weather = np.concatenate(
        [predictions_scaled.numpy(), weather_features.numpy()], axis=1
    )
    predictions = scaler.inverse_transform(predictions_with_weather)[:, 0]

    return predictions


# Example usage in evaluation
def evaluate_model(model, test_dataset, scaler):
    all_predictions = []
    all_true_values = []

    for test_pv, test_weather in test_dataset:
        # Generate predictions for this batch
        batch_predictions = generate_predictions(model, test_weather, scaler)

        # Store predictions and true values
        all_predictions.extend(batch_predictions)
        all_true_values.extend(test_pv.numpy().flatten())

    # Convert to numpy arrays
    all_predictions = np.array(all_predictions)
    all_true_values = np.array(all_true_values)

    # Calculate metrics (e.g., MSE, MAE)
    mse = np.mean((all_predictions - all_true_values) ** 2)
    mae = np.mean(np.abs(all_predictions - all_true_values))

    print(f"Mean Squared Error: {mse}")
    print(f"Mean Absolute Error: {mae:.4f}")

    return all_predictions, all_true_values

## `pre_gap_data` training model
---

In [11]:
train_dataset, test_dataset, scaler = prepare_data(
        pre_gap_train_combined,
        pre_gap_test_combined,
        weather_features
    )    

In [12]:
# Train the model
num_weather_features = len(weather_features)
solar_gan, history = train_solar_gan(train_dataset, num_weather_features)
plot_training_history(history)

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
2025-02-06 17:11:50.678557: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2025-02-06 17:12:04.949975: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
2025-02-06 17:12:33.982864: I tensorflow/core/framework/local_rendezvous.cc:405] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 5/1000 | D Loss: 7.6752 | G Loss: 0.4528
Current learning rate: 0.000100


KeyboardInterrupt: 

In [None]:
print("\nEvaluating model on test set...")
predictions, true_values = evaluate_model(solar_gan, test_dataset, scaler)

plt.figure(figsize=(12, 6))
plt.plot(true_values, label='True Values', marker='o')
plt.plot(predictions, label='Predicted Values', marker='x')
plt.xlabel('Sample Index')
plt.ylabel('PV Yield')
plt.legend()
plt.title('True vs Predicted PV Yield (First 100 samples)')
plt.show()