In [77]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD

from sktime.performance_metrics.forecasting import (
    mean_absolute_percentage_error,
    mean_absolute_error,
)

from operator import itemgetter 

import seaborn as sns
import matplotlib.pyplot as plt

import random


In [61]:
class AutoEncoder(nn.Module):
    def __init__(self, input_shape, latent_dim):
        super(AutoEncoder, self).__init__()


        self.encoder = nn.Sequential(nn.Linear(input_shape, input_shape-2),
                                           nn.ReLU(),
                                           nn.Linear(input_shape-2, input_shape-4),
                                           nn.ReLU(),
                                           nn.Linear( input_shape-4, input_shape-6),
                                           nn.ReLU(),
                                           nn.Linear( input_shape-6, latent_dim),
                                           nn.ReLU(),
                                           )
        
        self.decoder = nn.Sequential(nn.Linear(latent_dim, input_shape-6),
                                    nn.ReLU(),
                                    nn.Linear( input_shape-6, input_shape-4),
                                    nn.ReLU(),
                                    nn.Linear(input_shape-4, input_shape-2),
                                    nn.ReLU(),
                                    nn.Linear(input_shape-2, input_shape),
                                    nn.ReLU(),
                                           )


        # self.rnn_layer = nn.RNN(input_size=input_shape,hidden_size=10,num_layers=3, nonlinearity='relu')
        # self.layer_1 =  nn.Linear(10, n_dense_1)
        # self.bn1 = nn.BatchNorm1d(num_features=n_dense_1)
        # self.layer_2 =  nn.Linear(n_dense_1, n_dense_2)
        # self.layer_3 =  nn.Linear(n_dense_2, output_shape, dtype= torch.float32)
        
        
    def forward(self, input):
        x = self.encoder(input)
        outputs = self.decoder(x)

        return outputs
    
    def encoder_predict(self, input):
        outputs = self.encoder(input)
        return outputs
    

In [62]:
training_data = pd.read_csv('../train_df_2.csv')
training_data = training_data.drop(['Unnamed: 0'], axis=1)

feature_data = training_data[[f'RV{i}' for i in range(0,10 + 1)]]

In [63]:
feature_data_tensor = torch.tensor(feature_data.to_numpy(), dtype= torch.float32)

In [None]:
feature_data_tensor[] 

In [78]:
total_indexes = list(range(len(feature_data)))

In [115]:
auto_encoder = AutoEncoder( input_shape=len(feature_data.columns), latent_dim=5)
optimizer = torch.optim.Adam(auto_encoder.parameters(), lr = 0.01)

batch_size = 30

for epoch in range(10000):
    sampled_indexes = random.sample(total_indexes, k=batch_size)
    sampled_tensors = torch.tensor(itemgetter(*sampled_indexes)(feature_data.to_numpy()), dtype= torch.float32)
    pred = auto_encoder(sampled_tensors)
    
    cost = nn.functional.mse_loss(pred,sampled_tensors)
    optimizer.zero_grad()
    cost.backward()
    optimizer.step()

    if epoch % 100==0:
        print('epoch:',epoch,' cost:',cost)

epoch: 0  cost: tensor(18736380., grad_fn=<MseLossBackward0>)
epoch: 100  cost: tensor(4348327.5000, grad_fn=<MseLossBackward0>)
epoch: 200  cost: tensor(4379557., grad_fn=<MseLossBackward0>)
epoch: 300  cost: tensor(4042839., grad_fn=<MseLossBackward0>)
epoch: 400  cost: tensor(3350926., grad_fn=<MseLossBackward0>)
epoch: 500  cost: tensor(3039632.7500, grad_fn=<MseLossBackward0>)
epoch: 600  cost: tensor(3873139.2500, grad_fn=<MseLossBackward0>)
epoch: 700  cost: tensor(2686432.2500, grad_fn=<MseLossBackward0>)
epoch: 800  cost: tensor(2553183.7500, grad_fn=<MseLossBackward0>)
epoch: 900  cost: tensor(3064450.7500, grad_fn=<MseLossBackward0>)
epoch: 1000  cost: tensor(3280841.2500, grad_fn=<MseLossBackward0>)
epoch: 1100  cost: tensor(2999315.7500, grad_fn=<MseLossBackward0>)
epoch: 1200  cost: tensor(3091421.2500, grad_fn=<MseLossBackward0>)
epoch: 1300  cost: tensor(2446339., grad_fn=<MseLossBackward0>)
epoch: 1400  cost: tensor(4502908., grad_fn=<MseLossBackward0>)
epoch: 1500  co

In [116]:
check_index = 10
print('input: ',feature_data_tensor[check_index],'\n')
print('input recreated: ', auto_encoder(feature_data_tensor[check_index]),'\n')
print('latent space: ', auto_encoder.encoder_predict(feature_data_tensor[check_index]),'\n')

input:  tensor([4106.3901, 3554.4900, 3554.4900, 3554.4900, 3554.4900, 3554.4900,
        3554.4900, 3554.4900, 3554.4900, 3423.4900, 3423.4900]) 

input recreated:  tensor([3973.8352, 3780.9116, 3683.4824, 3621.3030, 3568.5515, 3520.6174,
        3485.8430, 3451.2693, 3413.9028,    0.0000, 3337.7742],
       grad_fn=<ReluBackward0>) 

latent space:  tensor([    0.0000,     0.0000,     0.0000, 11455.8770,     0.0000],
       grad_fn=<ReluBackward0>) 



In [44]:
# tensorflow implementation

encoder = Sequential()
encoder.add(Dense(units=5, activation = 'relu', input_shape =[11]))

decoder = Sequential()
decoder.add(Dense(units=11, activation = 'relu', input_shape =[5]))

auto_encoder_tflow = Sequential([encoder, decoder])
auto_encoder_tflow.compile(loss='mse', optimizer=SGD(learning_rate=0.001))

In [45]:
auto_encoder_tflow.fit(feature_data, feature_data, epochs=1000, batch_size=100)

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E

<keras.callbacks.History at 0x1dd0a05d3f0>

In [46]:
feature_data.iloc[0].values

array([3356.83, 3142.41, 3008.41, 3004.41, 3004.41, 2870.41, 2870.41,
       2870.41, 2642.41, 2642.41, 2539.41])

In [47]:
feature_data.iloc[0].values.reshape(1,-1)

array([[3356.83, 3142.41, 3008.41, 3004.41, 3004.41, 2870.41, 2870.41,
        2870.41, 2642.41, 2642.41, 2539.41]])

In [48]:
auto_encoder_tflow.predict(feature_data.iloc[0].values.reshape(1,-1))



array([[   0.    , 1864.1183,    0.    ,    0.    ,    0.    , 1635.3627,
           0.    ,    0.    ,    0.    ,    0.    ,    0.    ]],
      dtype=float32)