In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, GRU, Dropout, Layer
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split, KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
from sklearn.impute import KNNImputer
import matplotlib.pyplot as plt

In [2]:
# Load the data
data = pd.read_csv("Clearness_Index.csv")

In [3]:
# Convert date column to datetime format
data['DATE'] = pd.to_datetime(data['DATE'], format='%d-%m-%Y %H:%M')

In [4]:
# Handle missing values
imputer = KNNImputer(n_neighbors=5)
data_imputed = pd.DataFrame(imputer.fit_transform(data.drop(columns=['DATE'])), columns=data.columns[1:])


In [5]:
# Attention Layer
class AttentionLayer(Layer):
    def __init__(self):
        super(AttentionLayer, self).__init__()

    def build(self, input_shape):
        self.W = self.add_weight(shape=(input_shape[-1], 1), initializer='random_normal', trainable=True)
        self.b = self.add_weight(shape=(1,), initializer='zeros', trainable=True)

    def call(self, inputs):
        score = tf.matmul(inputs, self.W) + self.b
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * inputs
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector

In [6]:
# GCN Layer
class GCNLayer(Layer):
    def __init__(self, out_features):
        super(GCNLayer, self).__init__()
        self.dense = Dense(out_features, activation='relu')

    def call(self, inputs, adj_matrix):
        x = self.dense(inputs)  # Apply linear transformation
        x = tf.matmul(adj_matrix, x)  # Apply adjacency matrix
        return x


In [7]:
# GRU Layer
class GRULayer(Layer):
    def __init__(self, hidden_size):
        super(GRULayer, self).__init__()
        self.gru = GRU(hidden_size, return_sequences=False)

    def call(self, inputs):
        output = self.gru(inputs)  # Apply GRU
        return output

In [8]:
# Multi-Attribute Spatial Temporal Graph Convolutional Network
class MSTGCN(Model):
    def __init__(self, num_features, gcn_hidden_dim, gru_hidden_dim, output_dim, dropout_rate):
        super(MSTGCN, self).__init__()
        self.gcn_layer = GCNLayer(gcn_hidden_dim)
        self.attention_layer = AttentionLayer()
        self.gru_layer = GRULayer(gru_hidden_dim)
        self.dropout = Dropout(dropout_rate)
        self.fc = Dense(output_dim)

    def call(self, inputs, adj_matrix):
        spatial_features = self.gcn_layer(inputs, adj_matrix)
        attention_features = self.attention_layer(spatial_features)
        temporal_features = self.gru_layer(tf.expand_dims(attention_features, axis=1))
        temporal_features = self.dropout(temporal_features)
        output = self.fc(temporal_features)
        return output

In [9]:
# Add lags
window_size = 10
lagged_data = []
for i in range(len(data_imputed) - window_size):
    lagged_data.append(data_imputed.iloc[i:i + window_size].values)
lagged_data = np.array(lagged_data)

In [10]:
# Extract features and target
features = lagged_data[:, :-1, :]
target = lagged_data[:, -1, -1].reshape(-1, 1)

In [11]:
# Standardize features
scaler = StandardScaler()
features_scaled = scaler.fit_transform(features.reshape(-1, features.shape[-1])).reshape(features.shape)
target_scaled = scaler.fit_transform(target)

In [12]:
# Split the data into 80% training and 20% testing
X_train_full, X_test, y_train_full, y_test = train_test_split(features_scaled, target_scaled, test_size=0.2, random_state=42)

In [13]:
# Define model parameters
input_dim = features.shape[2]
gcn_hidden_dim = 288
gru_hidden_dim = 288
output_dim = 1
dropout_rate = 0.5
num_epochs = 200
batch_size = 32

In [14]:
# Learning rate scheduler
initial_learning_rate = 0.0005
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.96,
    staircase=True
)

In [15]:
# Initialize the KFold cross-validator
kf = KFold(n_splits=5, shuffle=True, random_state=42)


In [16]:
# Prepare to collect the cross-validation results
val_losses = []
val_maes = []
val_rmses = []

In [None]:
# Cross-validation loop
for train_index, val_index in kf.split(X_train_full):
    X_train, X_val = X_train_full[train_index], X_train_full[val_index]
    y_train, y_val = y_train_full[train_index], y_train_full[val_index]

    # Convert to TensorFlow datasets
    train_dataset = tf.data.Dataset.from_tensor_slices((X_train, y_train)).batch(batch_size)
    val_dataset = tf.data.Dataset.from_tensor_slices((X_val, y_val)).batch(batch_size)

    # Initialize MSTGCN model
    model = MSTGCN(input_dim, gcn_hidden_dim, gru_hidden_dim, output_dim, dropout_rate)

    # Define loss function and optimizer
    loss_fn = tf.keras.losses.MeanSquaredError()
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

    # Compile the model
    model.compile(optimizer=optimizer, loss=loss_fn)

    # Training loop
    for epoch in range(num_epochs):
        # Training
        for X_batch, y_batch in train_dataset:
            adj_matrix_train = tf.eye(X_batch.shape[1])  # Update adjacency matrix shape to match sequence length
            with tf.GradientTape() as tape:
                outputs = model(X_batch, adj_matrix_train)
                loss = loss_fn(y_batch, outputs)
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))

        # Validation
        val_loss = 0
        val_predictions = []
        val_targets = []
        for X_batch, y_batch in val_dataset:
            adj_matrix_val = tf.eye(X_batch.shape[1])  # Update adjacency matrix shape to match sequence length
            val_outputs = model(X_batch, adj_matrix_val)
            val_loss += loss_fn(y_batch, val_outputs).numpy()
            val_predictions.append(val_outputs.numpy())
            val_targets.append(y_batch.numpy())

        val_loss /= len(val_dataset)
        val_predictions = np.concatenate(val_predictions, axis=0)
        val_targets = np.concatenate(val_targets, axis=0)
        val_mae = mean_absolute_error(val_targets, val_predictions)
        val_rmse = np.sqrt(mean_squared_error(val_targets, val_predictions))

        if epoch % 10 == 0:
            print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.numpy()}, Val Loss: {val_loss}, Val MAE: {val_mae}, Val RMSE: {val_rmse}')

    val_losses.append(val_loss)
    val_maes.append(val_mae)
    val_rmses.append(val_rmse)

print("Cross-Validation Results")
print("Average Val Loss:", np.mean(val_losses))
print("Average Val MAE:", np.mean(val_maes))
print("Average Val RMSE:", np.mean(val_rmses))

Epoch 1/200, Loss: 0.4412233829498291, Val Loss: 0.41058951306205266, Val MAE: 0.5308501465553971, Val RMSE: 0.6408783397529625
Epoch 11/200, Loss: 0.17459988594055176, Val Loss: 0.13932360441698505, Val MAE: 0.2871544803334572, Val RMSE: 0.373408131881467
Epoch 21/200, Loss: 0.12765821814537048, Val Loss: 0.10034026065572149, Val MAE: 0.2377447592337452, Val RMSE: 0.31701453492899156
Epoch 31/200, Loss: 0.04744064807891846, Val Loss: 0.08664934803338754, Val MAE: 0.21882736680874096, Val RMSE: 0.29456912509964295
Epoch 41/200, Loss: 0.012683162465691566, Val Loss: 0.07788128936609437, Val MAE: 0.20497616141906105, Val RMSE: 0.27921417836920265
Epoch 51/200, Loss: 0.004023399669677019, Val Loss: 0.07193052341267897, Val MAE: 0.19516253917496387, Val RMSE: 0.2682905494939422
Epoch 61/200, Loss: 0.0007780454470776021, Val Loss: 0.06810431091943918, Val MAE: 0.18891922547664314, Val RMSE: 0.26103817366361026
Epoch 71/200, Loss: 0.0006814705557189882, Val Loss: 0.06584537363035141, Val MAE

In [None]:
# Final Evaluation on Test Set
test_dataset = tf.data.Dataset.from_tensor_slices((X_test, y_test)).batch(batch_size)

test_loss = 0
test_predictions = []
test_targets = []

for X_batch, y_batch in test_dataset:
    adj_matrix_test = tf.eye(X_batch.shape[1])  # Update adjacency matrix shape to match sequence length
    test_outputs = model(X_batch, adj_matrix_test)
    test_loss += loss_fn(y_batch, test_outputs).numpy()
    test_predictions.append(test_outputs.numpy())
    test_targets.append(y_batch.numpy())

test_loss /= len(test_dataset)
test_predictions = np.concatenate(test_predictions, axis=0)
test_targets = np.concatenate(test_targets, axis=0)

mae = mean_absolute_error(test_targets, test_predictions)
rmse = np.sqrt(mean_squared_error(test_targets, test_predictions))
r2 = r2_score(test_targets, test_predictions)

In [None]:
print("Test MAE:", mae)
print("Test RMSE:", rmse)
print("Test R^2:", r2)

In [None]:
# Calculate NMAE and NRMSE
mean_target = np.mean(test_targets)
nmae = mae / mean_target
nrmse = rmse / mean_target

In [None]:
print("Test NMAE:", nmae)
print("Test NRMSE:", nrmse)

In [None]:
# Plot Actual vs Predicted values
plt.figure(figsize=(10, 5))
plt.plot(test_targets, label='Actual')
plt.plot(test_predictions, label='Predicted')
plt.xlabel('Sample Index')
plt.ylabel('ALLSKY_SFC_SW_DWN')
plt.title('Actual vs forecasted Values')
plt.legend()
plt.show()