In [1]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 18129819954233674812
, name: "/device:XLA_CPU:0"
device_type: "XLA_CPU"
memory_limit: 17179869184
locality {
}
incarnation: 4251716424907609144
physical_device_desc: "device: XLA_CPU device"
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 5060693856
locality {
  bus_id: 1
  links {
  }
}
incarnation: 4568953413862815985
physical_device_desc: "device: 0, name: GeForce GTX 1060 with Max-Q Design, pci bus id: 0000:01:00.0, compute capability: 6.1"
, name: "/device:XLA_GPU:0"
device_type: "XLA_GPU"
memory_limit: 17179869184
locality {
}
incarnation: 13180721813535250776
physical_device_desc: "device: XLA_GPU device"
]


In [2]:
import argparse
import itertools
import os
import warnings
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from timenet import TimeNet, SimpleSeriesGenerator, normalize_series

In [3]:
finetune_rate=None
early_stop=10
learning_rate=.005
embeddings_dim = 64
n_epochs = 10
batch_size = 2
num_layers = 1
dropout=.4
normalize = "none"#['none', 'zscore', 'minmax']
model_name = 'enc'
dynamic_batches=True
training_file = "D:\Work\TimeNet-master\data\dataset_train.feather"
validation_file ="D:\Work\TimeNet-master\data\dataset_valid.feather"

In [4]:
dst_train = pd.read_feather(training_file)
print(dst_train)

                  dataset                             series_id     value  \
0        ItalyPowerDemand  67995397-e0d9-4399-b2b7-aa955552af7c -0.710518   
1        ItalyPowerDemand  67995397-e0d9-4399-b2b7-aa955552af7c -1.183320   
2        ItalyPowerDemand  67995397-e0d9-4399-b2b7-aa955552af7c -1.372442   
3        ItalyPowerDemand  67995397-e0d9-4399-b2b7-aa955552af7c -1.593083   
4        ItalyPowerDemand  67995397-e0d9-4399-b2b7-aa955552af7c -1.467002   
...                   ...                                   ...       ...   
2326353         ShapesAll  178b6700-e471-46fc-adf1-50e9e4bddfd4  1.311526   
2326354         ShapesAll  178b6700-e471-46fc-adf1-50e9e4bddfd4  1.332473   
2326355         ShapesAll  178b6700-e471-46fc-adf1-50e9e4bddfd4  1.355873   
2326356         ShapesAll  178b6700-e471-46fc-adf1-50e9e4bddfd4  1.377027   
2326357         ShapesAll  178b6700-e471-46fc-adf1-50e9e4bddfd4  1.398365   

        class  
0           1  
1           1  
2           1  
3          

In [3]:
def read_series_data(filename):
    dst_train = pd.read_feather(filename)
    series = dst_train[['series_id', 'value']].groupby(by='series_id')['value'].apply(np.array)
    series = pd.DataFrame({'series': series, 'length': series.apply(len)}).sort_values(by='length', ascending=False).reset_index()
    return series[['series_id', 'series']], series['length'].max()

In [4]:
series_train, maxlen = read_series_data(training_file)
print(series_train)
print(type(series_train))
print(maxlen)
series_valid, _ = read_series_data(validation_file)
train_data = series_train['series']
valid_data = series_valid['series']
"""print(train_data.shape)
print(type(train_data))"""

                                  series_id  \
0      4c3b4de4-c315-402b-94d6-21d582fc9f94   
1      c022f81d-7927-4374-81ec-581c64613955   
2      c0142947-2352-4ef0-ac2f-0b1bfe2f3d28   
3      6b9a24fa-e1b5-44f7-a91e-f0d198215308   
4      d1069661-1bd1-493f-8839-8a1825b5e905   
...                                     ...   
10935  5728da4e-f6e0-4697-a99c-ab0adc6d16af   
10936  1d40e6d3-618b-4453-9df1-0cb32fab50cc   
10937  b7ac3bd3-a2fd-4ec4-bbc3-d2d77a0c51be   
10938  1d513b1b-4cd6-446d-8535-f2719cef1c12   
10939  ea0128e1-9362-4139-8b4d-eb2c0b31006d   

                                                  series  
0      [1.9665286, 1.8997803, 1.8332209, 1.7668505, 1...  
1      [0.5258499, 0.54246599, 0.50590382, 0.48555841...  
2      [1.7834022, 1.7849346, 1.7869075, 1.789314, 1....  
3      [1.6507212, 1.5952497, 1.5399348, 1.4847766, 1...  
4      [1.4747435, 1.4603898, 1.4472921, 1.435464, 1....  
...                                                  ...  
10935  [0.038482736, -

'print(train_data.shape)\nprint(type(train_data))'

In [5]:
def create_train_valid_sets(series, validation_split=0.2, batch_size=32):
    x = range(series.shape[0])
    batches = int(np.floor(series.shape[0] / batch_size))
    batches_train, batches_valid = train_test_split(range(batches), test_size=validation_split, random_state=0)
    idx_train = sorted(itertools.chain(*[x[(ind * batch_size):((ind + 1) * batch_size)] for ind in batches_train]))
    idx_valid = sorted(itertools.chain(*[x[(ind * batch_size):((ind + 1) * batch_size)] for ind in batches_valid]))
    return series.iloc[idx_train], series.iloc[idx_valid]

In [6]:
def reconstruct(train_data, enc, log_dir, normalize=True):
    print("Creating reconstructions...")
    pd.concat([pd.DataFrame({'index': k,
                             'series': normalize_series(train_data.tolist()[k], normalize),
                             'decoded': enc.decode(train_data.iloc[k])})
               for k in range(len(train_data))], axis='rows')\
        .reset_index()\
        .to_feather(os.path.join(log_dir, 'reconstructed_train.feather'))

def read_series_metadata(filename):
    dst_train = pd.read_feather(filename)
    return dst_train[['dataset','series_id', 'class']].drop_duplicates()

In [7]:
"""if training_file is None or len(training_file.strip()) == 0 or not os.path.isfile(training_file):
    raise Exception(f"Training dataset {training_file} does not exist")

if validation_file is None or len(validation_file.strip()) == 0 or not os.path.isfile(validation_file):
    warnings.warn(f"Validation dataset {training_file} does not exist, will use training dataset for validation")
    validation_file = None

if validation_file is None:
    series_train, maxlen = read_series_data(training_file)
    train_data, valid_data = create_train_valid_sets(series_train['series'], batch_size=batch_size)
else:"""
if dynamic_batches is True:
    maxlen = None
train_generator = SimpleSeriesGenerator(train_data, batch_size=batch_size, X_only=False, normalize=normalize, maxlen=maxlen)
valid_generator = SimpleSeriesGenerator(valid_data, batch_size=batch_size, X_only=False, normalize=normalize, maxlen=maxlen)
enc = TimeNet(embeddings_dim, num_layers=num_layers, batch_size=batch_size, model_name=model_name, dropout=dropout)
history, log_dir = enc.train(train_generator, nb_epoch=n_epochs, validation_data=valid_generator,
                             finetune_rate=finetune_rate, lr=learning_rate, early_stop=early_stop)
print(history.history)
print("Creating embeddings for the series dataset...")
generator = SimpleSeriesGenerator(train_data, batch_size=batch_size, X_only=True, normalize=normalize, maxlen=maxlen)
embed_train = enc.encode(generator)
embed_train = pd.DataFrame(embed_train)
embed_train.columns = list(map(str, range(embed_train.shape[1])))
embed_train['series_id'] = series_train['series_id']
train_meta = read_series_metadata(training_file)
embed_train = embed_train.merge(train_meta, on='series_id')
embed_train.to_feather(os.path.join(log_dir, 'embeddings.feather'))


Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
main_input (InputLayer)         [(None, None, 1)]    0                                            
__________________________________________________________________________________________________
masking (Masking)               (None, None, 1)      0           main_input[0][0]                 
__________________________________________________________________________________________________
encode_1 (GRU)                  [(None, None, 64), ( 12864       masking[0][0]                    
__________________________________________________________________________________________________
drop_encode_1 (Dropout)         (None, None, 64)     0           encode_1[0][0]                   
_______________________________________________________________________________________

  X = np.array([x for x in X if x is not None])


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
{'loss': [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], 'root_mean_squared_error': [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], 'mean_absolute_error': [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], 'mean_squared_logarithmic_error': [0.19818589091300964, 0.19818589091300964, 0.19818611443042755, 0.19818592071533203, 0.19818627834320068, 0.19818618893623352, 0.19818612933158875, 0.19818612933158875, 0.19818618893623352, 0.19818580150604248], 'val_loss': [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], 'val_root_mean_squared_error': [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], 'val_mean_absolute_error': [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan], 'val_mean_squared_logarithmic_error': [0.20617619156837463, 0.20617619156837463, 0.20617619156837463, 0.20617619156837463, 0.20617619156837463, 0.20617619156837463, 0.20617619156837463, 0.20617619156837463, 0.2061