# GAN MODEL TRAINING
---

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt

from helpers import find_missing_date_ranges
from gan import SolarGAN
from enums import (
    DatasetColumns,
    WeatherDatasetColumns
)

from sklearn.preprocessing import MinMaxScaler

In [2]:
print(tf.keras.__version__)

3.4.0


## Data Preparation
---


In [3]:
# Load data
FILE_NAME = "dataset.csv"
WEATHER_DATASET = "dataset_weather.csv"

original_data = pd.read_csv(
    FILE_NAME,
    parse_dates=[DatasetColumns.STATISTICAL_PERIOD.value],
    index_col=DatasetColumns.STATISTICAL_PERIOD.value,
)

weather_data = pd.read_csv(
    WEATHER_DATASET,
    parse_dates=[WeatherDatasetColumns.DATETIME.value],
    index_col=WeatherDatasetColumns.DATETIME.value,
).asfreq("h")

weather_features = [
    WeatherDatasetColumns.TEMPERATURE_C.value,
    WeatherDatasetColumns.HUMIDITY_PERCENT.value,
]

In [4]:
# Find missing date ranges
gap_start, gap_end = find_missing_date_ranges(
    original_data, DatasetColumns.STATISTICAL_PERIOD.value
)
gap_dates = pd.date_range(start=gap_start, end=gap_end, freq="h")

In [5]:
# Data Splitting
pre_gap_data = original_data[original_data.index < gap_start].asfreq("h")
post_gap_data = original_data[original_data.index >= gap_end].asfreq("h")

pre_gap_train_size = int(len(pre_gap_data) * 0.8)
pre_gap_train = pre_gap_data.iloc[:pre_gap_train_size].copy()
pre_gap_test = pre_gap_data.iloc[pre_gap_train_size:]

pre_gap_train.loc[:, DatasetColumns.PV_YIELD.value] = pre_gap_train[
    DatasetColumns.PV_YIELD.value
].interpolate(method="linear")


pre_weather_data = weather_data[weather_data.index < gap_start].bfill()
pre_weather_data = pre_weather_data.reindex(pre_gap_data.index)
pre_weather_data_test = pre_weather_data.reindex(pre_gap_test.index)


gap_weather_data = weather_data.reindex(gap_dates).ffill()
post_weather_data = weather_data[weather_data.index >= gap_end].bfill()

pre_gap_train_combined = pre_gap_train.join(
    pre_weather_data[weather_features], how="inner"
)
pre_gap_test_combined = pre_gap_test.join(
    pre_weather_data_test[weather_features], how="inner"
)

## GAN Setup
---

In [None]:
# GAN Hyperparameters
LATENT_DIM = 128
LEARNING_RATE = 5e-5
LEARNING_RATE_G = 2e-5  
LEARNING_RATE_D = 1e-5
BATCH_SIZE = 32
EPOCHS = 1000

In [7]:
# Prepare Data
def prepare_data(pre_gap_train_combined, pre_gap_test_combined, weather_features):
    combined_columns = [DatasetColumns.PV_YIELD.value] + weather_features
    scaler = MinMaxScaler()

    train_scaled = scaler.fit_transform(pre_gap_train_combined[combined_columns])
    train_pv = train_scaled[:, 0:1].astype(np.float32)  
    train_weather = train_scaled[:, 1:].astype(np.float32)  
    
    test_scaled = scaler.transform(pre_gap_test_combined[combined_columns])
    test_pv = test_scaled[:, 0:1].astype(np.float32)
    test_weather = test_scaled[:, 1:].astype(np.float32)
        
    train_dataset = tf.data.Dataset.from_tensor_slices(
        (tf.cast(train_pv, tf.float32), tf.cast(train_weather, tf.float32))
    )
    train_dataset = train_dataset.shuffle(buffer_size=1000).batch(BATCH_SIZE)
    test_dataset = tf.data.Dataset.from_tensor_slices(
        (tf.cast(test_pv, tf.float32), tf.cast(test_weather, tf.float32))
    )
    test_dataset = test_dataset.batch(BATCH_SIZE)

    return train_dataset, test_dataset, scaler

In [None]:
# Train SOLAR GAN
def train_solar_gan(train_dataset, num_features):
    solar_gan = SolarGAN(LATENT_DIM, num_features)
    solar_gan.compile(
        g_optimizer=tf.keras.optimizers.Adam(
            learning_rate=LEARNING_RATE_G, beta_1=0.5, beta_2=0.9
        ),
        d_optimizer=tf.keras.optimizers.Adam(
            learning_rate=LEARNING_RATE_D, beta_1=0.5, beta_2=0.9
        ),
    )

    history = {"d_loss": [], "g_loss": []}
    best_loss = float("inf")
    patience = 100
    patience_counter = 0
    min_epochs = 300

    initial_lr_g = 2e-5
    initial_lr_d = 1e-5

    for epoch in range(EPOCHS):

        if epoch > 0 and epoch % 200 == 0:
            solar_gan.g_optimizer.learning_rate = LEARNING_RATE_G * 0.9
            solar_gan.d_optimizer.learning_rate = LEARNING_RATE_D * 0.9
            initial_lr_g *= 0.9
            initial_lr_d *= 0.9

        d_losses = []
        g_losses = []

        for batch_data in train_dataset:
            losses = solar_gan.train_step(batch_data)
            d_losses.append(float(losses["d_loss"]))
            g_losses.append(float(losses["g_loss"]))

        avg_d_loss = np.mean(d_losses)
        avg_g_loss = np.mean(g_losses)
        history["d_loss"].append(avg_d_loss)
        history["g_loss"].append(avg_g_loss)

        print(f"Epoch {epoch + 1}/{EPOCHS}")
        print(f"D Loss: {avg_d_loss:.4f} | G Loss: {avg_g_loss:.4f}")

        if epoch >= min_epochs:
            current_loss = abs(avg_d_loss) + abs(avg_g_loss)
            if current_loss < best_loss * 0.999:
                best_loss = current_loss
                patience_counter = 0
            else:
                patience_counter += 1

            if patience_counter >= patience:
                print(f"Early stopping triggered at epoch {epoch + 1}")
                break

    return solar_gan, history

In [9]:
# Visualize training progress
def plot_training_history(history):
    plt.figure(figsize=(12, 5))
    plt.plot(history["d_loss"], label="Discriminator Loss")
    plt.plot(history["g_loss"], label="Generator Loss")
    plt.xlabel("Epoch")
    plt.ylabel("Loss")
    plt.legend()
    plt.title("Training History")
    plt.grid(True)
    plt.show()

In [10]:
# Evaluate Model
def generate_predictions(model, weather_features, scaler):
    weather_features = tf.cast(weather_features, tf.float32)
    batch_size = tf.shape(weather_features)[0]

    noise = tf.random.normal([batch_size, LATENT_DIM], dtype=tf.float32)    
    predictions_scaled = model.generator([noise, weather_features], training=False)

    predictions_with_weather = np.concatenate(
        [predictions_scaled.numpy(), weather_features.numpy()], axis=1
    )
    predictions = scaler.inverse_transform(predictions_with_weather)[:, 0]

    return predictions

def evaluate_model(model, test_dataset, scaler):
    all_predictions = []
    all_true_values = []

    for test_pv, test_weather in test_dataset:
        batch_predictions = generate_predictions(model, test_weather, scaler)
        all_predictions.extend(batch_predictions)
        all_true_values.extend(test_pv.numpy().flatten())

    all_predictions = np.array(all_predictions)
    all_true_values = np.array(all_true_values)
    
    mse = np.mean((all_predictions - all_true_values) ** 2)
    mae = np.mean(np.abs(all_predictions - all_true_values))

    print(f"Mean Squared Error: {mse}")
    print(f"Mean Absolute Error: {mae:.4f}")

    return all_predictions, all_true_values

## `pre_gap_data` training model
---

In [11]:
# Prepare PRE-GAP train and test data
train_dataset, test_dataset, scaler = prepare_data(
    pre_gap_train_combined,
    pre_gap_test_combined,
    weather_features
)

In [None]:
# Train the model
num_weather_features = len(weather_features)
solar_gan, history = train_solar_gan(train_dataset, num_weather_features)
plot_training_history(history)



In [None]:
# Evaluating model on test set
predictions, true_values = evaluate_model(solar_gan, test_dataset, scaler)

plt.figure(figsize=(12, 6))
plt.plot(true_values, label='True Values', color='orange')
plt.plot(predictions, label='Predicted Values',color='green')
plt.legend()
plt.title('WGAP-GP Predictions vs Actual')
plt.show()