In [1]:
import math
import numpy as np
import pandas as pd
import tensorflow as tf
from functools import partial

In [6]:
df = pd.read_csv('household_power_consumption Preprocessed.csv',parse_dates=['date_time'], index_col= 'date_time')
df.head()

Unnamed: 0_level_0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
date_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006-12-16 17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
2006-12-16 17:25:00,5.36,0.436,233.63,23.0,0.0,1.0,16.0
2006-12-16 17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
2006-12-16 17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
2006-12-16 17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0


In [7]:
def train_test_split(df):
    
    # compute split point
    end_idx = df.shape[0]* 70 // 100
    
    train_data = df.iloc[:end_idx, : ]
    test_data = df.iloc[end_idx:, :]
    
    return train_data, test_data

In [8]:
from sklearn.preprocessing import MinMaxScaler
def scale_data(train, test):
    scaler = MinMaxScaler().fit(train)
    return scaler.transform(train), scaler.transform(test), scaler

In [9]:
def convert_to_supervised(df):

    input_features = []
    ouput_feature = []
    
    len_df = df.shape[0]
    
    for i in range(len_df):
        
        end_idx = i + 1 
        
        if end_idx > len_df-1:
            break
            
        input_x , output_y = df[i:end_idx, 1:], df[end_idx: end_idx+1, 0]
        
        input_features.append(input_x)
        ouput_feature.append(output_y)
    
    return np.array(input_features), np.mean(np.array(ouput_feature), axis=1)

# 

### Download the performer package from this link -> https://drive.google.com/file/d/1zA3mrpr_4G-cpNkvfVwgNktUoDdyVVMz/view?usp=sharing 
### Unzip the package and move it to -> C:\Users\UserName\anaconda3\Lib\site-packages 

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

from performer.networks.linear_attention import Performer

class TokenAndPositionEmbedding(layers.Layer):
    def __init__(self, maxlen, vocab_size, embed_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = layers.Embedding(input_dim=vocab_size, output_dim=embed_dim)
        self.pos_emb = layers.Embedding(input_dim=maxlen, output_dim=embed_dim)

    def call(self, x):
        maxlen = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=maxlen, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions


class TransformerBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim, method, supports, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = Performer(num_heads=num_heads, key_dim=embed_dim,
                             attention_method=method, supports=supports)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training):
        attn_output = self.att([inputs, inputs])
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [3]:
# Test

vocab_size = 20000 
maxlen = 200 

print(len(x_train), "Training sequences")
print(len(x_val), "Validation sequences")
x_train = keras.preprocessing.sequence.pad_sequences(x_train, maxlen=maxlen)
x_val = keras.preprocessing.sequence.pad_sequences(x_val, maxlen=maxlen)

embed_dim = 32 
num_heads = 2 
ff_dim = 32 
method = 'linear'
supports = 10

inputs = layers.Input(shape=(maxlen,))
embedding_layer = TokenAndPositionEmbedding(maxlen, vocab_size, embed_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, num_heads,
                                     ff_dim, method, supports)
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(2, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile("adam", "sparse_categorical_crossentropy")

#     model.compile("adam", "mean_square_error") # The loss does not change

callback = tf.keras.callbacks.EarlyStopping(monitor='loss', patience=5)

history = model.fit(
    x_train, y_train, batch_size=32, epochs=20,verbose=1, shuffle=False, callbacks=[callback]
)

25000 Training sequences
25000 Validation sequences
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


### Energy dataset

In [19]:
train, val = train_test_split(df.iloc[:3000,:])
train, val, scaler = scale_data(train, test)
x_train , y_train = convert_to_supervised(train)
x_val , y_val = convert_to_supervised(test)

In [30]:
vocab_size = 3000 
maxlen = 3000 

embed_dim = 2
num_heads = 2 
ff_dim = 1
method = 'linear'
supports = 10

inputs = layers.Input(shape=(x_train.shape[1],x_train.shape[2]))

embedding_layer = TokenAndPositionEmbedding(maxlen, 
                                            vocab_size, 
                                            embed_dim
                                           )

x = embedding_layer(inputs)
transformer_block = TransformerBlock(embed_dim, 
                                     num_heads,
                                     ff_dim, 
                                     method, 
                                     supports
                                    )
x = transformer_block(x)
x = layers.GlobalAveragePooling1D()(x)
x = layers.Dropout(0.1)(x)
x = layers.Dense(20, activation="relu")(x)
x = layers.Dropout(0.1)(x)
outputs = layers.Dense(2, activation="softmax")(x)

model = keras.Model(inputs=inputs, outputs=outputs)
model.compile("adam", "sparse_categorical_crossentropy")

#     model.compile("adam", "mean_square_error") # The loss does not change

history = model.fit(
    x_train, y_train, batch_size=32, epochs=10,verbose=1
)

ValueError: Input 0 of layer "global_average_pooling1d_10" is incompatible with the layer: expected ndim=3, found ndim=4. Full shape received: (None, 1, 6, 2)