# GAN MODEL TRAINING
---

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from helpers import (
    find_missing_date_ranges,
    evaluate_predictions,
    compare_prediction
)

from gan import (build_generator, build_discriminator, SolarGAN)

from enums import (
    DatasetColumns,
    WeatherDatasetColumns
)

from sklearn.preprocessing import MinMaxScaler

In [2]:
print(tf.keras.__version__)

3.4.0


## Data Preparation
---


In [3]:
# Load data

FILE_NAME = "dataset.csv"
WEATHER_DATASET = "dataset_weather.csv"


original_data = pd.read_csv(
    FILE_NAME,
    parse_dates=[DatasetColumns.STATISTICAL_PERIOD.value],
    index_col=DatasetColumns.STATISTICAL_PERIOD.value,
)

weather_data = pd.read_csv(
    WEATHER_DATASET,
    parse_dates=[WeatherDatasetColumns.DATETIME.value],
    index_col=WeatherDatasetColumns.DATETIME.value,
).asfreq("h")

weather_features = [
    WeatherDatasetColumns.TEMPERATURE_C.value,
    WeatherDatasetColumns.HUMIDITY_PERCENT.value,
]

In [4]:
# Find missing date ranges
gap_start, gap_end = find_missing_date_ranges(
    original_data, DatasetColumns.STATISTICAL_PERIOD.value
)
gap_dates = pd.date_range(start=gap_start, end=gap_end, freq="h")

In [5]:
# Data Splitting
pre_gap_data = original_data[original_data.index < gap_start].asfreq("h")
post_gap_data = original_data[original_data.index >= gap_end].asfreq("h")

pre_gap_train_size = int(len(pre_gap_data) * 0.8)
pre_gap_train = pre_gap_data.iloc[:pre_gap_train_size].copy()
pre_gap_test = pre_gap_data.iloc[pre_gap_train_size:]

pre_gap_train.loc[:, DatasetColumns.PV_YIELD.value] = pre_gap_train[
    DatasetColumns.PV_YIELD.value
].interpolate(method="linear")


pre_weather_data = weather_data[weather_data.index < gap_start].bfill()
pre_weather_data = pre_weather_data.reindex(pre_gap_data.index)
pre_weather_data_test = pre_weather_data.reindex(pre_gap_test.index)


gap_weather_data = weather_data.reindex(gap_dates).ffill()
post_weather_data = weather_data[weather_data.index >= gap_end].bfill()


pre_gap_train_combined = pre_gap_train.join(
    pre_weather_data[weather_features], how="inner"
)
pre_gap_test_combined = pre_gap_test.join(
    pre_weather_data_test[weather_features], how="inner"
)

## GAN Setup
---

In [6]:
# GAN Hyperparameters
LATENT_DIM = 10
LEARNING_RATE = 0.0002
BATCH_SIZE = 64
EPOCHS = 10

In [8]:
def prepare_data(pre_gap_train_combined, pre_gap_test_combined, weather_features):
    # Scale the data
    combined_columns = [DatasetColumns.PV_YIELD.value] + weather_features
    scaler = MinMaxScaler()

    # Fit and transform training data
    train_scaled = scaler.fit_transform(pre_gap_train_combined[combined_columns])
    train_pv = train_scaled[:, 0:1]  # PV yield
    train_weather = train_scaled[:, 1:]  # Weather features

    # Transform test data
    test_scaled = scaler.transform(pre_gap_test_combined[combined_columns])
    test_pv = test_scaled[:, 0:1]
    test_weather = test_scaled[:, 1:]

    # Create TensorFlow datasets
    train_dataset = tf.data.Dataset.from_tensor_slices((train_pv, train_weather))
    train_dataset = train_dataset.shuffle(buffer_size=1000).batch(BATCH_SIZE)

    test_dataset = tf.data.Dataset.from_tensor_slices((test_pv, test_weather))
    test_dataset = test_dataset.batch(BATCH_SIZE)

    return train_dataset, test_dataset, scaler

In [9]:
def train_solar_gan(train_dataset, num_features):
    # Build generator and discriminator
    generator = build_generator(
        input_dim=LATENT_DIM + num_features, output_dim=1  # PV yield output
    )

    discriminator = build_discriminator(
        input_dim=1 + num_features  # PV yield + weather features
    )

    # Create WGAN model
    solar_gan = SolarGAN(
        latent_dim=LATENT_DIM,
        feature_dim=num_features,
        generator=generator,
        discriminator=discriminator,
    )

    # Compile model
    solar_gan.compile(
        optimizer=tf.keras.optimizers.Adam(LEARNING_RATE, beta_1=0.5, beta_2=0.9)
    )

    # Training history
    history = {"d_loss": [], "g_loss": []}

    # Training loop
    for epoch in range(EPOCHS):
        epoch_d_loss = []
        epoch_g_loss = []

        for batch_data in train_dataset:
            losses = solar_gan.train_step(batch_data)
            epoch_d_loss.append(float(losses["d_loss"]))
            epoch_g_loss.append(float(losses["g_loss"]))

        # Average losses over the epoch
        avg_d_loss = np.mean(epoch_d_loss)
        avg_g_loss = np.mean(epoch_g_loss)

        history["d_loss"].append(avg_d_loss)
        history["g_loss"].append(avg_g_loss)

        if (epoch + 1) % 10 == 0:
            print(
                f"Epoch {epoch + 1}/{EPOCHS} | D Loss: {avg_d_loss:.4f} | G Loss: {avg_g_loss:.4f}"
            )

    return solar_gan, history

In [10]:
def generate_predictions(model, weather_features, scaler):
    batch_size = len(weather_features)
    noise = tf.random.normal([batch_size, LATENT_DIM])
    generator_inputs = tf.concat([noise, weather_features], axis=1)

    # Generate predictions
    predictions_scaled = model.generator(generator_inputs, training=False)

    # Prepare for inverse transform
    predictions_with_weather = np.concatenate(
        [predictions_scaled, weather_features], axis=1
    )
    predictions = scaler.inverse_transform(predictions_with_weather)[:, 0]

    return predictions

## `pre_gap_data` training model
---

In [11]:
train_dataset, test_dataset, scaler = prepare_data(
        pre_gap_train_combined,
        pre_gap_test_combined,
        weather_features
    )    

In [12]:
# Train the model
num_weather_features = len(weather_features)
solar_gan, history = train_solar_gan(train_dataset, num_weather_features)

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


ValueError: The layer Generator has never been called and thus has no defined input.

In [None]:
# Generate predictions for test set
for test_pv, test_weather in test_dataset:
    predictions = generate_predictions(solar_gan, test_weather, scaler)
    # Store or evaluate predictions as needed

# Plot training history
plt.figure(figsize=(10, 5))
plt.plot(history["d_loss"], label="Discriminator Loss")
plt.plot(history["g_loss"], label="Generator Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend()
plt.title("Training History")
plt.show()