In [63]:
import torch
import copy

import seaborn as sns
from pylab import rcParams
import matplotlib.pyplot as plt
from matplotlib import rc

import numpy as np
import glob, os
import string
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix
from sklearn.metrics import precision_recall_fscore_support

from torch import nn, optim
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.metrics.pairwise import euclidean_distances
import torch.nn.functional as F

In [64]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
dir = './models/*'
model_files = glob.glob(dir)
device

device(type='cuda')

In [65]:
class Encoder(nn.Module):
    def __init__(self, seq_len, n_features, embedding_dim=64):
        super(Encoder, self).__init__()
        
        self.seq_len, self.n_features = seq_len, n_features
        self.embedding_dim, self.hidden_dim = embedding_dim, 2 * embedding_dim
        
        self.rnn1 = nn.LSTM(
          input_size=n_features,
          hidden_size=self.hidden_dim,
          num_layers=1,
          batch_first=True
        )
        self.rnn2 = nn.LSTM(
          input_size=self.hidden_dim,
          hidden_size=embedding_dim,
          num_layers=1,
          batch_first=True
        )
    def forward(self, x):

        x = x.reshape((1, self.seq_len, self.n_features))
        
        x, (_, _) = self.rnn1(x)
        x, (hidden_n, _) = self.rnn2(x)
        
        return hidden_n.reshape((self.seq_len, self.embedding_dim))

In [66]:
class Decoder(nn.Module):
    def __init__(self, seq_len, input_dim=64, n_features=1):
        super(Decoder, self).__init__()
        
        self.seq_len, self.input_dim = seq_len, input_dim
        self.hidden_dim, self.n_features = 2 * input_dim, n_features
       
        self.rnn1 = nn.LSTM(
            input_size=input_dim,
            hidden_size=input_dim,
            num_layers=1,
            batch_first=True
        )
        self.rnn2 = nn.LSTM(
            input_size=input_dim,
            hidden_size=self.hidden_dim,
            num_layers=1,
            batch_first=True
        )
        
        self.output_layer = nn.Linear(self.hidden_dim, n_features)
    def forward(self, x):
        x = x.repeat(self.seq_len, self.n_features)
        x = x.reshape((self.n_features, self.seq_len, self.input_dim))
        
        x, (hidden_n, cell_n) = self.rnn1(x)
        x, (hidden_n, cell_n) = self.rnn2(x)
        x = x.reshape((self.n_features, self.hidden_dim))
        return self.output_layer(x)

In [67]:
class RecurrentAutoencoder(nn.Module):
    def __init__(self, seq_len, n_features, embedding_dim=64):
        super(RecurrentAutoencoder, self).__init__()
        self.encoder = Encoder(seq_len, n_features, embedding_dim).to(device)
        self.decoder = Decoder(seq_len, embedding_dim, n_features).to(device)
    def forward(self, x):
        x = self.encoder(x)
        x = self.decoder(x)
        return x

In [68]:
model_files[0]

'./models/model_onehot_x1data_MSE_epoch200.pth'

In [69]:
model_LSTM = torch.load(model_files[0])

In [70]:
print(model_LSTM)
alpha = list(string.ascii_uppercase)
chr2index = {alpha[i]:i for i in range(len(alpha))}

RecurrentAutoencoder(
  (encoder): Encoder(
    (rnn1): LSTM(265, 256, batch_first=True)
    (rnn2): LSTM(256, 128, batch_first=True)
  )
  (decoder): Decoder(
    (rnn1): LSTM(128, 128, batch_first=True)
    (rnn2): LSTM(128, 256, batch_first=True)
    (output_layer): Linear(in_features=256, out_features=265, bias=True)
  )
)


In [96]:
def chr2OH(alphabet):
    oh = [0 for i in range(len(alpha))]
    index = chr2index[alphabet]
    oh[index] = 1
    return oh

In [97]:
dir = './datasets/fsm/seq/0graph*.txt'
# file read
all_names = []
all_data = []
sequence_length = []
alpha = list(string.ascii_uppercase)
data_length = len(glob.glob(dir))

files = glob.glob(dir)

In [98]:
files = glob.glob(dir)
for file in files:
    datasets = []
    for rf in open(file, 'r'):
        (u, v, w) = rf[1:-2].split(', ')
        datasets.append(chr2OH(u[1]) + chr2OH(v[1]) +[float(w)])
        #datasets.append([chr2index[u[1]], chr2index[v[1]], float(w)])
    sequence_length.append(len(datasets))
    all_data.append(datasets)


In [99]:
max_sequence_length = 5
#zeros = np.zeros(53)
zeros = [0 for i in range(53)]
for ind, data in enumerate(all_data):
    if len(data) != max_sequence_length:
        for i in range(len(data), max_sequence_length):
            all_data[ind].append(zeros)

In [100]:
all_data = np.array([np.array(arr) for arr in all_data])
all_d = []
for ind, data in enumerate(all_data):
    all_d.append(data.flatten())
all_d
len(all_d)

382

In [101]:

def create_dataset_1(nparrays):
    dataset = [torch.tensor(s).unsqueeze(0).float() for s in nparrays]
  #step_per_epoch, seq_len, llen, n_features = torch.stack(dataset).shape
    #n_seq, m ,seq_len, n_features = print(torch.stack(dataset).shape)
    print(torch.stack(dataset).shape)
    n_seq, seq_len, n_features = torch.stack(dataset).shape
    #print( n_seq, m , seq_len, n_features )
    return dataset, seq_len, n_features

In [103]:
ts, seq_len_1d , n_features_1d  = create_dataset_1(all_d)
ss  = [torch.tensor(s).unsqueeze(0).float() for s in all_d]
T = ts[1]
F = ts[0]
M = ts[2]

torch.Size([382, 1, 265])


In [104]:
M

tensor([[ 1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000, 10.5201,  0.0000,  1.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  1.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000,
          0.0000,  0.0000,  

In [105]:
def predict(model, dataset):
    predictions, losses = [], []
    criterion = nn.L1Loss(reduction='mean').to(device)
    with torch.no_grad():
        model = model.eval()
        for seq_true in dataset:
            seq_true = seq_true.to(device)
            seq_pred = model(seq_true)
            print(seq_pred.shape)
            loss = criterion(seq_pred, seq_true)
            print(loss)
            print(seq_pred.cpu().numpy().flatten())
            predictions.append(seq_pred.cpu().numpy().flatten())
            losses.append(loss.item())
    return predictions, losses

In [108]:
prT, _ = predict(model_onehot_L1, T)

print(prT)

torch.Size([265, 265])
tensor(0.0180, device='cuda:0')
[ 5.2082576e-02 -4.7331393e-02  1.4518905e-02 ...  3.3811826e-02
 -4.0389672e-03  4.6819334e+00]
[array([ 5.2082576e-02, -4.7331393e-02,  1.4518905e-02, ...,
        3.3811826e-02, -4.0389672e-03,  4.6819334e+00], dtype=float32)]


In [110]:
prF, _ = predict(model_onehot_L1, F)
print(prF)

torch.Size([265, 265])
tensor(0.0199, device='cuda:0')
[ 0.50145906  0.46425033 -0.00424828 ...  0.02180772 -0.0035957
  2.4830134 ]
[array([ 0.50145906,  0.46425033, -0.00424828, ...,  0.02180772,
       -0.0035957 ,  2.4830134 ], dtype=float32)]


In [89]:
prM,_= predict(model_onehot_L1, M)
prM

torch.Size([265, 265])
tensor(0.0196, device='cuda:0')
[ 0.502443    0.4656234  -0.00696728 ...  0.02163182 -0.00367077
  2.482694  ]


[array([ 0.502443  ,  0.4656234 , -0.00696728, ...,  0.02163182,
        -0.00367077,  2.482694  ], dtype=float32)]

In [90]:
euclidean_distances(prT, prF)

array([[169.08931]], dtype=float32)

In [91]:
euclidean_distances(prT, prM)

array([[169.93518]], dtype=float32)

In [92]:
euclidean_distances(prF, prM)

array([[1.5390404]], dtype=float32)

In [93]:
cosine_similarity(prT, prF)

array([[0.74176836]], dtype=float32)

In [94]:
cosine_similarity(prT, prM)

array([[0.7395986]], dtype=float32)

In [95]:
cosine_similarity(prF, prM)

array([[0.99998033]], dtype=float32)