In [1]:
# PyTorch implementation for LSTM FCN for Time Series Classification
# Original code in TensorFlow https://github.com/titu1994/LSTM-FCN
# Paper https://arxiv.org/abs/1709.05206
#
# By David Campos and Teodor Vernica

In [2]:
from sail.models.torch.lstm_fcn import _LSTM_FCN, LSTMFCNClassifier
from sail.models.torch.fcn import FCNClassifier # An optional model without LSTM

1. Importing and checking that the model works.


In [3]:
# Model works
import torch
input = torch.randn(5, 10)

model = _LSTM_FCN(in_channels=1,input_size=input.size()[1],classes=3)
output = model(input)
print(output)

tensor([[0.3168, 0.3641, 0.3191],
        [0.3346, 0.3581, 0.3073],
        [0.3128, 0.3675, 0.3197],
        [0.3079, 0.3701, 0.3220],
        [0.3245, 0.3923, 0.2832]], grad_fn=<SoftmaxBackward0>)


In [4]:
# Skorch works
import numpy as np
from sklearn.datasets import make_classification

X = torch.randn(5, 10)
y = np.random.randint(3, size=10)

X, y = make_classification(30, 10, n_informative=5, random_state=0)

X = X.astype(np.float32)
y = y.astype(np.int64)

model_skorch = LSTMFCNClassifier(in_channels=1,input_size=10, lstm_layers=8, classes=3)

partial_fit = model_skorch.partial_fit(X,y)
print(partial_fit)
predict = model_skorch.predict(X)
print(predict)

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m1.1251[0m  0.0807
<class 'sail.models.torch.lstm_fcn.LSTMFCNClassifier'>[initialized](
  module_=_LSTM_FCN(
    (lstm): LSTM(1, 128, num_layers=8)
    (drop): Dropout(p=0.8, inplace=False)
    (conv_layers): Sequential(
      (0): ConvBlock(
        (conv_layers): Sequential(
          (0): Conv1dSamePadding(1, 128, kernel_size=(8,), stride=(1,))
          (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU()
        )
      )
      (1): ConvBlock(
        (conv_layers): Sequential(
          (0): Conv1dSamePadding(128, 256, kernel_size=(5,), stride=(1,))
          (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU()
        )
      )
      (2): ConvBlock(
        (conv_layers): Sequential(
          (0): Conv1dSamePadding(256, 128, kernel_size=(3,), stride=(1,))
          (1): BatchNorm1d(128, eps=

2. Loading a time-series dataset [(ACSF1)](http://timeseriesclassification.com/description.php?Dataset=ACSF1), from [timeseriesclassification.com](http://timeseriesclassification.com/dataset.php) to test the model.


In [5]:
import requests, zipfile, io
r = requests.get("https://www.timeseriesclassification.com/aeon-toolkit/ACSF1.zip", stream=True)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("data")

In [6]:
import arff # pip install liac-arff

train_dataset = arff.load(open('data/ACSF1_TRAIN.arff'))
train_data = np.array(train_dataset['data'])

X_train = train_data[:,0:-1]
y_train = train_data[:,-1]

X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.int64)

print(X_train)
print(y_train)

test_dataset = arff.load(open('data/ACSF1_TEST.arff'))

test_data = np.array(test_dataset['data'])

X_test = test_data[:,0:-1]
y_test = test_data[:,-1]

X_test = X_test.astype(np.float32)
y_test = y_test.astype(np.int64)

[[-0.58475375 -0.58475375  1.730991   ... -0.5786034   1.7327257
  -0.584734  ]
 [-0.59143436 -0.51110417  1.7268198  ... -0.5807305   1.7273961
  -0.5807305 ]
 [-0.57794535 -0.57794535  1.7307931  ... -0.5497977   1.7347268
  -0.5777511 ]
 ...
 [-0.99827707  0.10246194  1.6069248  ...  0.09938861  1.5636905
  -0.69265294]
 [-0.9414731   0.58721364  1.5236441  ...  0.5822302   1.5482239
  -0.645292  ]
 [-0.6615355  -0.6615355   1.5103272  ... -0.6605395   1.5101048
  -0.6606845 ]]
[9 9 9 9 9 9 9 9 9 9 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 0
 0 0 0 6 6 6 6 6 6 6 6 6 6 5 5 5 5 5 5 5 5 5 5 2 2 2 2 2 2 2 2 2 2 8 8 8 8
 8 8 8 8 8 8 7 7 7 7 7 7 7 7 7 7 1 1 1 1 1 1 1 1 1 1]


3. **Batch training.** Testing the model on the time-series data with batch training. The model learns, given the entire data-set and enough epochs.


In [7]:
# Test on time series with all data at once
classes = 10

model_skorch = LSTMFCNClassifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)
#model_skorch = FCN_Classifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)

#good results around 50 epochs
for i in range(0,25):
    partial_fit = model_skorch.partial_fit(X_train, y_train)

print(partial_fit.score(X_test, y_test))

predict = model_skorch.predict(X_test)

print(predict)
print(y_test)

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m2.3072[0m  8.1256
      2        [36m2.2861[0m  7.6160
      3        [36m2.2775[0m  8.8214
      4        [36m2.2580[0m  7.9017
      5        [36m2.2459[0m  7.6660
      6        [36m2.2294[0m  7.8081
      7        [36m2.2166[0m  7.7658
      8        [36m2.2022[0m  8.3024
      9        [36m2.1983[0m  7.6986
     10        [36m2.1846[0m  7.8461
     11        [36m2.1674[0m  7.5518
     12        [36m2.1591[0m  7.6780
     13        [36m2.1426[0m  7.6557
     14        [36m2.1387[0m  7.9912
     15        [36m2.1215[0m  7.8065
     16        [36m2.1110[0m  7.8733
     17        [36m2.1008[0m  7.8367
     18        [36m2.0901[0m  7.6144
     19        [36m2.0835[0m  7.5784
     20        [36m2.0687[0m  7.6918
     21        [36m2.0625[0m  7.7997
     22        [36m2.0463[0m  7.7066
     23        [36m2.0401[0m  7.8978
     24        [36m2.0259[0m  7.8032
    

4. **Mini-batch training.** In an online environment, we might not have access to all data at once or might not afford to re-train the model with all data for multiple epochs. So we test the model with mini-batch training.


In [8]:
# Test on time series data in mini-batches
from sklearn.utils import gen_batches

batch_size = 10

model_skorch = LSTMFCNClassifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)

# We can not use epochs because it is online learning
# for i in range(0,10): 
#     partial_fit = model_skorch.partial_fit(X_train, y_train)

# Batch processing, we have 100 time series samples, so the model trains with 10 examples every time
for batch in gen_batches(train_data.shape[0], batch_size, min_batch_size=batch_size):
    current_batch = train_data[batch]
    
    X_train_batch = current_batch[:,0:-1]
    y_train_batch = current_batch[:,-1]

    X_train_batch = X_train_batch.astype(np.float32)
    y_train_batch = y_train_batch.astype(np.int64)
    
    partial_fit = model_skorch.partial_fit(X_train_batch, y_train_batch)

predict = model_skorch.predict(X_test)

print(predict)
print(y_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m2.4948[0m  0.4546
      2        [36m2.1583[0m  0.4182
      3        2.3510  0.4466
      4        2.2528  0.4571
      5        2.7746  0.4494
      6        2.2171  0.4411
      7        2.5658  0.4806
      8        2.5322  0.5316
      9        2.3279  0.5038
     10        2.5547  0.5076
[7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7]
[9 9 9 9 9 9 9 9 9 9 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 0
 0 0 0 6 6 6 6 6 6 6 6 6 6 5 5 5 5 5 5 5 5 5 5 2 2 2 2 2 2 2 2 2 2 8 8 8 8
 8 8 8 8 8 8 7 7 7 7 7 7 7 7 7 7 1 1 1 1 1 1 1 1 1 1]
0.1


5. **Mini-batch training without LSTM.** The model does not do as well in an on-line setting. That could be attributed to the LSTM component requiring more training, which depends on the batch. To compare, we test a version of the model without the LSTM component on the same dataset dataset, which is faster and sometimes gives better results.


In [9]:
batch_size = 10

model_skorch = FCNClassifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)
    
# Batch processing, we have 100 time series samples, so the model trains with 10 examples every time
for batch in gen_batches(train_data.shape[0], batch_size, min_batch_size=batch_size):
    current_batch = train_data[batch]
    
    X_train_batch = current_batch[:,0:-1]
    y_train_batch = current_batch[:,-1]

    X_train_batch = X_train_batch.astype(np.float32)
    y_train_batch = y_train_batch.astype(np.int64)
    
    partial_fit = model_skorch.partial_fit(X_train_batch, y_train_batch)

predict = model_skorch.predict(X_test)
print(predict)
print(y_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m1.7550[0m  0.0710
      2        2.4336  0.0740
      3        2.3812  0.0661
      4        2.1999  0.0747
      5        2.6929  0.0705
      6        2.8383  0.0669
      7        2.4946  0.0674
      8        2.4134  0.0688
      9        2.5974  0.0684
     10        2.5182  0.0706
[9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9
 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9 9]
[9 9 9 9 9 9 9 9 9 9 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 0
 0 0 0 6 6 6 6 6 6 6 6 6 6 5 5 5 5 5 5 5 5 5 5 2 2 2 2 2 2 2 2 2 2 8 8 8 8
 8 8 8 8 8 8 7 7 7 7 7 7 7 7 7 7 1 1 1 1 1 1 1 1 1 1]
0.1


6. **Loading a larger dataset.** To test this more, we can try the two incremental versions of the model on a larger time-series dataset, such as [FordA](http://timeseriesclassification.com/description.php?Dataset=FordA).


In [10]:
import requests, zipfile, io
r = requests.get("https://www.timeseriesclassification.com/aeon-toolkit/FordA.zip", stream=True)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("data")

In [11]:
train_dataset = arff.load(open('data/FordA_TRAIN.arff'))
train_data = np.array(train_dataset['data'])

X_train = train_data[:,0:-1]
y_train = train_data[:,-1]

X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.int64)

print(X_train.shape)
print(y_train.shape)

print(X_train)
        
y_train = np.where(y_train == -1, 0, y_train)
        
print(y_train)
    

test_dataset = arff.load(open('data/FordA_TEST.arff'))

test_data = np.array(test_dataset['data'])

X_test = test_data[:,0:-1]
y_test = test_data[:,-1]

X_test = X_test.astype(np.float32)
y_test = y_test.astype(np.int64)

y_test = np.where(y_test == -1, 0, y_test)


(3601, 500)
(3601,)
[[-0.79717165 -0.66439205 -0.37301463 ... -0.66439205 -1.0737958
  -1.5643427 ]
 [ 0.8048547   0.6346286   0.37347448 ... -0.71488506 -0.5604429
  -0.31908643]
 [ 0.7279851   0.11128392 -0.49912438 ...  0.39446303  0.3394004
   0.2553906 ]
 ...
 [-0.5700543  -0.33316523 -0.29351854 ... -1.3937145  -0.9427333
  -0.27072167]
 [ 2.006732    2.07915     2.0220363  ... -0.43214503 -0.44123125
  -0.2807089 ]
 [-0.1252409  -0.32536268 -0.48823696 ...  0.5557605   0.574451
   0.573116  ]]
[0 1 0 ... 0 1 0]


7. **Mini-batch learning on the larger dataset.**


In [12]:
from sklearn.utils import gen_batches

batch_size = 100
classes = 2

model_skorch = LSTMFCNClassifier(in_channels=1,input_size=500, lstm_layers=8, classes=classes)

for batch in gen_batches(train_data.shape[0], batch_size, min_batch_size=batch_size):
    current_batch = train_data[batch]
    
    X_train_batch = current_batch[:,0:-1]
    y_train_batch = current_batch[:,-1]

    X_train_batch = X_train_batch.astype(np.float32)
    y_train_batch = y_train_batch.astype(np.int64)
    
    y_train_batch = np.where(y_train_batch == -1, 0, y_train_batch)
    
    partial_fit = model_skorch.partial_fit(X_train_batch, y_train_batch)

predict = model_skorch.predict(X_test)

print(predict)
print(y_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m0.6973[0m  3.1765
      2        [36m0.6937[0m  3.1929
      3        [36m0.6876[0m  3.1522
      4        0.6907  3.2283
      5        [36m0.6824[0m  3.0868
      6        0.6851  3.1812
      7        [36m0.6823[0m  3.1457
      8        0.6830  3.2029
      9        [36m0.6738[0m  3.2548
     10        [36m0.6720[0m  3.1630
     11        [36m0.6710[0m  3.0381
     12        0.6731  2.9658
     13        [36m0.6605[0m  3.3088
     14        0.6660  3.3455
     15        0.6712  3.3855
     16        [36m0.6559[0m  3.4832
     17        0.6622  3.4117
     18        [36m0.6543[0m  3.5346
     19        0.6566  3.6088
     20        0.6742  3.4044
     21        0.6660  3.4015
     22        0.6586  3.3737
     23        [36m0.6448[0m  3.3581
     24        [36m0.6267[0m  3.3535
     25        0.6335  3.1447
     26        0.6350  3.2810
     27        0.6480  3.4103
     28      

8. **Mini-batch learning on the larger dataset without LSTM**


In [13]:
batch_size = 100
classes = 2

#model_skorch = LSTM_FCN_Classifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)
model_skorch = FCNClassifier(in_channels=1,input_size=945, lstm_layers=8, classes=classes)
    
for batch in gen_batches(train_data.shape[0], batch_size, min_batch_size=batch_size):
    current_batch = train_data[batch]
    
    X_train_batch = current_batch[:,0:-1]
    y_train_batch = current_batch[:,-1]

    X_train_batch = X_train_batch.astype(np.float32)
    y_train_batch = y_train_batch.astype(np.int64)
    
    y_train_batch = np.where(y_train_batch == -1, 0, y_train_batch)
    
    partial_fit = model_skorch.partial_fit(X_train_batch, y_train_batch)

predict = model_skorch.predict(X_test)

print(predict)
print(y_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss     dur
-------  ------------  ------
      1        [36m0.6997[0m  1.2719
      2        [36m0.6901[0m  1.3205
      3        [36m0.6806[0m  1.2954
      4        0.6820  1.2806
      5        [36m0.6652[0m  1.2817
      6        [36m0.6650[0m  1.3706
      7        [36m0.6642[0m  1.2863
      8        0.6681  1.2711
      9        [36m0.6496[0m  1.2817
     10        [36m0.6426[0m  1.2918
     11        [36m0.6389[0m  1.2511
     12        0.6411  1.3365
     13        [36m0.6243[0m  1.2790
     14        0.6294  1.2918
     15        0.6383  1.2567
     16        [36m0.6139[0m  1.2814
     17        0.6322  1.2965
     18        0.6169  1.3592
     19        0.6224  1.3820
     20        0.6431  1.3096
     21        0.6221  1.3296
     22        0.6252  1.3688
     23        [36m0.5974[0m  1.7388
     24        [36m0.5647[0m  1.9660
     25        0.5914  1.5536
     26        0.5754  1.4937
     27        0.6097  1.5677
     28      