In [1]:
# PyTorch implementation for LSTM FCN for Time Series Classification
# Original code in TensorFlow https://github.com/titu1994/LSTM-FCN
# Paper https://arxiv.org/abs/1709.05206
#
# By David Campos and Teodor Vernica

In [4]:
from sail.models.torch.lstm_fcn import _LSTM_FCN, LSTM_FCN_Classifier
from sail.models.torch.fcn import FCN_Classifier # An optional model without LSTM

1. Importing and checking that the model works.

In [5]:
# Model works
import torch
input = torch.randn(5, 10)

model = _LSTM_FCN(in_channels=1,input_size=input.size()[1],classes=3)
output = model(input)
print(output)

tensor([[0.3852, 0.3504, 0.2645],
        [0.3343, 0.4296, 0.2361],
        [0.3402, 0.3755, 0.2843],
        [0.3702, 0.3729, 0.2570],
        [0.3791, 0.3489, 0.2720]], grad_fn=<SoftmaxBackward0>)


In [6]:
# Skorch works
import numpy as np
from sklearn.datasets import make_classification

X = torch.randn(5, 10)
y = np.random.randint(3, size=10)

X, y = make_classification(30, 10, n_informative=5, random_state=0)

X = X.astype(np.float32)
y = y.astype(np.int64)

model_skorch = LSTM_FCN_Classifier(in_channels=1,input_size=10, lstm_layers=8, classes=3)

partial_fit = model_skorch.partial_fit(X,y)
print(partial_fit)
predict = model_skorch.predict(X)
print(predict)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.0889[0m       [32m0.5000[0m        [35m1.0842[0m  0.1159
<class 'sail.models.torch.lstm_fcn.LSTM_FCN_Classifier'>[initialized](
  module_=_LSTM_FCN(
    (lstm): LSTM(1, 128, num_layers=8)
    (drop): Dropout(p=0.8, inplace=False)
    (conv_layers): Sequential(
      (0): ConvBlock(
        (conv_layers): Sequential(
          (0): Conv1dSamePadding(1, 128, kernel_size=(8,), stride=(1,))
          (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU()
        )
      )
      (1): ConvBlock(
        (conv_layers): Sequential(
          (0): Conv1dSamePadding(128, 256, kernel_size=(5,), stride=(1,))
          (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU()
        )
      )
      (2): ConvBlock(
        (conv_layers): Sequential(
         

2. Loading a time-series dataset [(ACSF1)](http://timeseriesclassification.com/description.php?Dataset=ACSF1), from [timeseriesclassification.com](http://timeseriesclassification.com/dataset.php) to test the model.

In [26]:
import arff # pip install liac-arff

train_dataset = arff.load(open('./Tests/Datasets/ACSF1/ACSF1_TRAIN.arff'))
train_data = np.array(train_dataset['data'])

X_train = train_data[:,0:-1]
y_train = train_data[:,-1]

X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.int64)

print(X_train)
print(y_train)

test_dataset = arff.load(open('./Tests/Datasets/ACSF1/ACSF1_TEST.arff'))

test_data = np.array(test_dataset['data'])

X_test = test_data[:,0:-1]
y_test = test_data[:,-1]

X_test = X_test.astype(np.float32)
y_test = y_test.astype(np.int64)

FileNotFoundError: [Errno 2] No such file or directory: './Tests/Datasets/ACSF1/ACSF1_TRAIN.arff'

3. **Batch training.** Testing the model on the time-series data with batch training. The model learns, given the entire data-set and enough epochs.


In [6]:
# Test on time series with all data at once
classes = 10

model_skorch = LSTM_FCN_Classifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)
#model_skorch = FCN_Classifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)

#good results around 50 epochs
for i in range(0,25):
    partial_fit = model_skorch.partial_fit(X_train, y_train)

print(partial_fit.score(X_test, y_test))

predict = model_skorch.predict(X_test)

print(predict)
print(y_test)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m2.3179[0m       [32m0.1000[0m        [35m2.3031[0m  9.9225
      2        [36m2.2976[0m       0.1000        [35m2.3026[0m  10.0113
      3        [36m2.2778[0m       0.1000        [35m2.3020[0m  10.1257
      4        [36m2.2620[0m       0.1000        [35m2.3012[0m  10.0037
      5        [36m2.2460[0m       0.1000        [35m2.3006[0m  10.6474
      6        [36m2.2273[0m       0.1000        [35m2.2995[0m  10.4894
      7        [36m2.2127[0m       0.1000        [35m2.2982[0m  11.0346
      8        [36m2.2001[0m       0.1000        [35m2.2968[0m  11.9972
      9        [36m2.1863[0m       0.1000        [35m2.2951[0m  10.7660
     10        [36m2.1680[0m       0.1000        [35m2.2931[0m  10.3665
     11        [36m2.1606[0m       0.1000        [35m2.2908[0m  11.6405
     12        [36m2.1494[0m       0.100

4. **Mini-batch training.** In an online environment, we might not have access to all data at once or might not afford to re-train the model with all data for multiple epochs. So we test the model with mini-batch training.

In [7]:
# Test on time series data in mini-batches
from sklearn.utils import gen_batches

batch_size = 10

model_skorch = LSTM_FCN_Classifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)

# We can not use epochs because it is online learning
# for i in range(0,10): 
#     partial_fit = model_skorch.partial_fit(X_train, y_train)

# Batch processing, we have 100 time series samples, so the model trains with 10 examples every time
for batch in gen_batches(train_data.shape[0], batch_size, min_batch_size=batch_size):
    current_batch = train_data[batch]
    
    X_train_batch = current_batch[:,0:-1]
    y_train_batch = current_batch[:,-1]

    X_train_batch = X_train_batch.astype(np.float32)
    y_train_batch = y_train_batch.astype(np.int64)
    
    partial_fit = model_skorch.partial_fit(X_train_batch, y_train_batch)

predict = model_skorch.predict(X_test)

print(predict)
print(y_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m2.7193[0m       [32m0.0000[0m        [35m2.3046[0m  0.9520
      2        [36m2.3143[0m       0.0000        [35m2.2700[0m  1.1025
      3        2.3743       [32m1.0000[0m        [35m2.2255[0m  0.9215
      4        2.4565       0.0000        2.3542  0.9896
      5        2.3683       0.0000        2.2514  1.0583
      6        [36m2.2524[0m       0.0000        2.3163  0.9432
      7        2.4469       0.0000        2.2880  0.9584
      8        2.2599       1.0000        [35m2.1995[0m  0.9693
      9        2.4866       0.0000        2.2820  0.9783
     10        2.4897       0.0000        2.3153  1.0398
[8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8]
[9 9 9 9 9 9 9 9 9 9 3 3 3 3 3 

5. **Mini-batch training without LSTM.** The model does not do as well in an on-line setting. That could be attributed to the LSTM component requiring more training, which depends on the batch. To compare, we test a version of the model without the LSTM component on the same dataset dataset, which is faster and sometimes gives better results.

In [9]:
batch_size = 10

model_skorch = FCN_Classifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)
    
# Batch processing, we have 100 time series samples, so the model trains with 10 examples every time
for batch in gen_batches(train_data.shape[0], batch_size, min_batch_size=batch_size):
    current_batch = train_data[batch]
    
    X_train_batch = current_batch[:,0:-1]
    y_train_batch = current_batch[:,-1]

    X_train_batch = X_train_batch.astype(np.float32)
    y_train_batch = y_train_batch.astype(np.int64)
    
    partial_fit = model_skorch.partial_fit(X_train_batch, y_train_batch)

predict = model_skorch.predict(X_test)
print(predict)
print(y_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m2.1796[0m       [32m0.0000[0m        [35m2.2876[0m  0.1515
      2        2.4042       [32m1.0000[0m        [35m2.2230[0m  0.1652
      3        2.4136       0.0000        2.2803  0.1677
      4        2.4996       0.0000        2.3319  0.1640
      5        2.7003       0.0000        2.3119  0.1372
      6        [36m2.1575[0m       0.0000        2.2390  0.1572
      7        2.6755       0.0000        2.3708  0.1844
      8        2.3463       1.0000        [35m2.2202[0m  0.1749
      9        2.5307       0.0000        2.2383  0.1622
     10        2.3080       0.0000        2.2919  0.1383
[8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 5 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8
 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8]
[9 9 9 9 9 9 9 9 9 9 3 3 3 3 3 3 3 3 3 3 4 4 4 4 

6. **Loading a larger dataset.** To test this more, we can try the two incremental versions of the model on a larger time-series dataset, such as [FordA](http://timeseriesclassification.com/description.php?Dataset=FordA). 

In [11]:
train_dataset = arff.load(open('./Tests/Datasets/FordA/FordA_TRAIN.arff'))
train_data = np.array(train_dataset['data'])

X_train = train_data[:,0:-1]
y_train = train_data[:,-1]

X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.int64)

print(X_train.shape)
print(y_train.shape)

print(X_train)
        
y_train = np.where(y_train == -1, 0, y_train)
        
print(y_train)
    

test_dataset = arff.load(open('./Tests/Datasets/FordA/FordA_TEST.arff'))

test_data = np.array(test_dataset['data'])

X_test = test_data[:,0:-1]
y_test = test_data[:,-1]

X_test = X_test.astype(np.float32)
y_test = y_test.astype(np.int64)

y_test = np.where(y_test == -1, 0, y_test)


(3601, 500)
(3601,)
[[-0.79717165 -0.66439205 -0.37301463 ... -0.66439205 -1.0737958
  -1.5643427 ]
 [ 0.8048547   0.6346286   0.37347448 ... -0.71488506 -0.5604429
  -0.31908643]
 [ 0.7279851   0.11128392 -0.49912438 ...  0.39446303  0.3394004
   0.2553906 ]
 ...
 [-0.5700543  -0.33316523 -0.29351854 ... -1.3937145  -0.9427333
  -0.27072167]
 [ 2.006732    2.07915     2.0220363  ... -0.43214503 -0.44123125
  -0.2807089 ]
 [-0.1252409  -0.32536268 -0.48823696 ...  0.5557605   0.574451
   0.573116  ]]
[0 1 0 ... 0 1 0]


7. **Mini-batch learning on the larger dataset.**

In [13]:
from sklearn.utils import gen_batches

batch_size = 100
classes = 2

model_skorch = LSTM_FCN_Classifier(in_channels=1,input_size=500, lstm_layers=8, classes=classes)

for batch in gen_batches(train_data.shape[0], batch_size, min_batch_size=batch_size):
    current_batch = train_data[batch]
    
    X_train_batch = current_batch[:,0:-1]
    y_train_batch = current_batch[:,-1]

    X_train_batch = X_train_batch.astype(np.float32)
    y_train_batch = y_train_batch.astype(np.int64)
    
    y_train_batch = np.where(y_train_batch == -1, 0, y_train_batch)
    
    partial_fit = model_skorch.partial_fit(X_train_batch, y_train_batch)

predict = model_skorch.predict(X_test)

print(predict)
print(y_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.6982[0m       [32m0.5500[0m        [35m0.6924[0m  4.4546
      2        [36m0.6889[0m       0.5500        0.6929  3.8726
      3        0.6917       0.5000        [35m0.6922[0m  3.9328
      4        0.6917       0.5000        0.6931  3.7746
      5        [36m0.6751[0m       0.5000        [35m0.6911[0m  3.7757
      6        0.6799       0.5500        0.6915  3.7699
      7        0.6829       0.4500        0.6934  3.8896
      8        0.6826       0.5500        0.6922  3.7851
      9        0.6768       0.4500        0.6944  3.8494
     10        [36m0.6645[0m       0.5000        0.6927  3.7519
     11        0.6667       0.5500        [35m0.6888[0m  3.8673
     12        [36m0.6641[0m       0.4500        0.6931  3.8072
     13        [36m0.6578[0m       0.5000        0.6920  3.8309
     14        0.6604       [32m0.7000[0m   

8. **Mini-batch learning on the larger dataset without LSTM**

In [17]:
batch_size = 100
classes = 2

#model_skorch = LSTM_FCN_Classifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)
model_skorch = FCN_Classifier(in_channels=1,input_size=945, lstm_layers=8, classes=classes)
    
for batch in gen_batches(train_data.shape[0], batch_size, min_batch_size=batch_size):
    current_batch = train_data[batch]
    
    X_train_batch = current_batch[:,0:-1]
    y_train_batch = current_batch[:,-1]

    X_train_batch = X_train_batch.astype(np.float32)
    y_train_batch = y_train_batch.astype(np.int64)
    
    y_train_batch = np.where(y_train_batch == -1, 0, y_train_batch)
    
    partial_fit = model_skorch.partial_fit(X_train_batch, y_train_batch)

predict = model_skorch.predict(X_test)

print(predict)
print(y_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.7348[0m       [32m0.5500[0m        [35m0.6886[0m  0.5065
      2        0.7622       0.5500        0.6889  0.5163
      3        [36m0.7322[0m       0.5000        0.6937  0.5396
      4        0.7387       0.5000        0.6937  0.5183
      5        [36m0.6921[0m       0.5000        0.6931  0.4586
      6        [36m0.6592[0m       0.5500        [35m0.6885[0m  0.5328
      7        0.7031       0.4500        0.6951  0.4973
      8        0.6777       0.5500        0.6896  0.5201
      9        0.7038       0.4000        0.6950  0.4917
     10        [36m0.6569[0m       0.4500        0.6913  0.5304
     11        [36m0.6344[0m       [32m0.6500[0m        [35m0.6873[0m  0.5186
     12        0.6473       [32m0.7000[0m        0.6892  0.5178
     13        [36m0.6092[0m       0.6000        0.6883  0.4884
     14        0.6390       

9. Both models perform better on the larger dataset.