In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Dropout, LayerNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam

loss_function = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

def scaled_dot_product_attention(query, key, value):
    matmul_qk = tf.matmul(query, key, transpose_b=True)
    depth = tf.cast(tf.shape(key)[-1], tf.float32)
    logits = matmul_qk / tf.math.sqrt(depth)
    attention_weights = tf.nn.softmax(logits, axis=-1)
    output = tf.matmul(attention_weights, value)
    return output, attention_weights

class MultiHeadAttention(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads):
        super(MultiHeadAttention, self).__init__()
        self.num_heads = num_heads
        self.d_model = d_model
        assert d_model % self.num_heads == 0
        self.depth = d_model // self.num_heads
        self.wq = Dense(d_model)
        self.wk = Dense(d_model)
        self.wv = Dense(d_model)
        self.dense = Dense(d_model)

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, v, k, q):
        batch_size = tf.shape(q)[0]
        q = self.wq(q)
        k = self.wk(k)
        v = self.wv(v)
        q = self.split_heads(q, batch_size)
        k = self.split_heads(k, batch_size)
        v = self.split_heads(v, batch_size)
        scaled_attention, _ = scaled_dot_product_attention(q, k, v)
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model))
        output = self.dense(concat_attention)
        return output

def point_wise_feed_forward_network(d_model, dff):
    return tf.keras.Sequential([
        Dense(dff, activation='relu'),
        Dense(d_model)
    ])

class EncoderLayer(tf.keras.layers.Layer):
    def __init__(self, d_model, num_heads, dff, rate=0.1):
        super(EncoderLayer, self).__init__()
        self.mha = MultiHeadAttention(d_model, num_heads)
        self.ffn = point_wise_feed_forward_network(d_model, dff)
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)

        # Project layer for input x
        self.dense_proj = Dense(d_model)
    def call(self, x, training):
        attn_output = self.mha(x, x, x) # Multi-head attention
        attn_output = self.dropout1(attn_output, training=training)
        
        # Project input x to have the same dimension as attn_output
        x_proj = self.dense_proj(x)  # Use the projection layer here
        out1 = self.layernorm1(x_proj + attn_output)
        
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        out2 = self.layernorm2(out1 + ffn_output)
        return out2

def build_transformer_model(sequence_length, feature_size, d_model=512, num_heads=8, dff=2048, rate=0.1):
    inputs = Input(shape=(sequence_length, feature_size))
    x = EncoderLayer(d_model, num_heads, dff, rate)(inputs)
    x = Dense(1, activation='linear')(x)  # Output layer for forecasting
    model = Model(inputs=inputs, outputs=x)
    #model.compile(optimizer=Adam(learning_rate=1e-4), loss='huber_loss')
    #model.compile(optimizer='adam', loss=loss_function, metrics=['accuracy'])
    model.compile(optimizer='adam', loss='mean_squared_error', metrics=['mae'])
    return model


In [None]:
import numpy as np
import tensorflow as tf

train_features_path = 'D:/IIT/Y5/Final Year Project/FYP/Datasets/archive4_1/unisolar/Train and Test data sets/X_test.npy'  
train_labels_path = 'D:/IIT/Y5/Final Year Project/FYP/Datasets/archive4_1/unisolar/Train and Test data sets/y_test.npy'  

# Load your data
X_train = np.load(train_features_path)
y_train = np.load(train_labels_path)

# Instantiate the model
sequence_length = 24  # 24 time steps for one day of hourly data
feature_size = 6  # Number of features in your dataset
model = build_transformer_model(sequence_length, feature_size)

# Train the model
history = model.fit(X_train, y_train, batch_size=32, epochs=100, validation_split=0.2)

# Save the model after training
model.save('D:/IIT/Y5/Final Year Project/FYP/Datasets/archive4_1/unisolar/Train and Test data sets/solar_forecasting_model.h5')


In [None]:
# Save the model after training
model.save('D:/IIT/Y5/Final Year Project/FYP/Datasets/archive4_1/unisolar/Train and Test data sets/solar_forecasting_model.keras')
