In [1]:
# PyTorch implementation for LSTM FCN for Time Series Classification
# Original code in TensorFlow https://github.com/titu1994/LSTM-FCN
# Paper https://arxiv.org/abs/1709.05206
#
# By David Campos and Teodor Vernica

In [2]:
from sail.models.torch.lstm_fcn import _LSTM_FCN, LSTM_FCN_Classifier
from sail.models.torch.fcn import FCN_Classifier # An optional model without LSTM

1. Importing and checking that the model works.

In [3]:
# Model works
import torch
input = torch.randn(5, 10)

model = _LSTM_FCN(in_channels=1,input_size=input.size()[1],classes=3)
output = model(input)
print(output)

tensor([[0.3576, 0.3481, 0.2942],
        [0.3753, 0.3179, 0.3068],
        [0.3621, 0.3147, 0.3232],
        [0.3349, 0.3488, 0.3163],
        [0.3572, 0.3358, 0.3070]], grad_fn=<SoftmaxBackward0>)


In [4]:
# Skorch works
import numpy as np
from sklearn.datasets import make_classification

X = torch.randn(5, 10)
y = np.random.randint(3, size=10)

X, y = make_classification(30, 10, n_informative=5, random_state=0)

X = X.astype(np.float32)
y = y.astype(np.int64)

model_skorch = LSTM_FCN_Classifier(in_channels=1,input_size=10, lstm_layers=8, classes=3)

partial_fit = model_skorch.partial_fit(X,y)
print(partial_fit)
predict = model_skorch.predict(X)
print(predict)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m1.1064[0m       [32m0.5000[0m        [35m1.0625[0m  0.1369
<class 'sail.models.torch.lstm_fcn.LSTM_FCN_Classifier'>[initialized](
  module_=_LSTM_FCN(
    (lstm): LSTM(1, 128, num_layers=8)
    (drop): Dropout(p=0.8, inplace=False)
    (conv_layers): Sequential(
      (0): ConvBlock(
        (conv_layers): Sequential(
          (0): Conv1dSamePadding(1, 128, kernel_size=(8,), stride=(1,))
          (1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU()
        )
      )
      (1): ConvBlock(
        (conv_layers): Sequential(
          (0): Conv1dSamePadding(128, 256, kernel_size=(5,), stride=(1,))
          (1): BatchNorm1d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
          (2): ReLU()
        )
      )
      (2): ConvBlock(
        (conv_layers): Sequential(
         

2. Loading a time-series dataset [(ACSF1)](http://timeseriesclassification.com/description.php?Dataset=ACSF1), from [timeseriesclassification.com](http://timeseriesclassification.com/dataset.php) to test the model.

In [2]:
import requests, zipfile, io
r = requests.get("http://timeseriesclassification.com/Downloads/ACSF1.zip", stream=True)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("data")

In [5]:
import arff # pip install liac-arff

train_dataset = arff.load(open('data/ACSF1_TRAIN.arff'))
train_data = np.array(train_dataset['data'])

X_train = train_data[:,0:-1]
y_train = train_data[:,-1]

X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.int64)

print(X_train)
print(y_train)

test_dataset = arff.load(open('data/ACSF1_TEST.arff'))

test_data = np.array(test_dataset['data'])

X_test = test_data[:,0:-1]
y_test = test_data[:,-1]

X_test = X_test.astype(np.float32)
y_test = y_test.astype(np.int64)

[[-0.58475375 -0.58475375  1.730991   ... -0.5786034   1.7327257
  -0.584734  ]
 [-0.59143436 -0.51110417  1.7268198  ... -0.5807305   1.7273961
  -0.5807305 ]
 [-0.57794535 -0.57794535  1.7307931  ... -0.5497977   1.7347268
  -0.5777511 ]
 ...
 [-0.99827707  0.10246194  1.6069248  ...  0.09938861  1.5636905
  -0.69265294]
 [-0.9414731   0.58721364  1.5236441  ...  0.5822302   1.5482239
  -0.645292  ]
 [-0.6615355  -0.6615355   1.5103272  ... -0.6605395   1.5101048
  -0.6606845 ]]
[9 9 9 9 9 9 9 9 9 9 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4 4 0 0 0 0 0 0 0
 0 0 0 6 6 6 6 6 6 6 6 6 6 5 5 5 5 5 5 5 5 5 5 2 2 2 2 2 2 2 2 2 2 8 8 8 8
 8 8 8 8 8 8 7 7 7 7 7 7 7 7 7 7 1 1 1 1 1 1 1 1 1 1]


3. **Batch training.** Testing the model on the time-series data with batch training. The model learns, given the entire data-set and enough epochs.


In [6]:
# Test on time series with all data at once
classes = 10

model_skorch = LSTM_FCN_Classifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)
#model_skorch = FCN_Classifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)

#good results around 50 epochs
for i in range(0,25):
    partial_fit = model_skorch.partial_fit(X_train, y_train)

print(partial_fit.score(X_test, y_test))

predict = model_skorch.predict(X_test)

print(predict)
print(y_test)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m2.3128[0m       [32m0.1000[0m        [35m2.3020[0m  5.9298
      2        [36m2.2872[0m       0.1000        [35m2.3013[0m  5.8644
      3        [36m2.2614[0m       0.1000        [35m2.3006[0m  6.9832
      4        [36m2.2501[0m       0.1000        [35m2.2995[0m  6.5666
      5        [36m2.2326[0m       0.1000        [35m2.2984[0m  5.9988
      6        [36m2.2215[0m       0.1000        [35m2.2973[0m  6.9001
      7        [36m2.2004[0m       0.1000        [35m2.2960[0m  6.4184
      8        [36m2.1893[0m       0.1000        [35m2.2945[0m  6.0221
      9        [36m2.1773[0m       0.1000        [35m2.2929[0m  5.9667
     10        [36m2.1658[0m       0.1000        [35m2.2911[0m  6.5615
     11        [36m2.1554[0m       0.1000        [35m2.2890[0m  6.6791
     12        [36m2.1380[0m       [32m0.1500[0m

4. **Mini-batch training.** In an online environment, we might not have access to all data at once or might not afford to re-train the model with all data for multiple epochs. So we test the model with mini-batch training.

In [7]:
# Test on time series data in mini-batches
from sklearn.utils import gen_batches

batch_size = 10

model_skorch = LSTM_FCN_Classifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)

# We can not use epochs because it is online learning
# for i in range(0,10): 
#     partial_fit = model_skorch.partial_fit(X_train, y_train)

# Batch processing, we have 100 time series samples, so the model trains with 10 examples every time
for batch in gen_batches(train_data.shape[0], batch_size, min_batch_size=batch_size):
    current_batch = train_data[batch]
    
    X_train_batch = current_batch[:,0:-1]
    y_train_batch = current_batch[:,-1]

    X_train_batch = X_train_batch.astype(np.float32)
    y_train_batch = y_train_batch.astype(np.int64)
    
    partial_fit = model_skorch.partial_fit(X_train_batch, y_train_batch)

predict = model_skorch.predict(X_test)

print(predict)
print(y_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m2.2672[0m       [32m0.0000[0m        [35m2.2882[0m  0.6118
      2        [36m2.2516[0m       0.0000        [35m2.2663[0m  0.5760
      3        2.6063       0.0000        2.3101  0.5379
      4        2.2838       0.0000        [35m2.2313[0m  0.5332
      5        2.2740       0.0000        [35m2.2259[0m  0.5363
      6        2.4328       0.0000        2.3124  0.6128
      7        2.5614       0.0000        2.3237  0.5423
      8        2.3683       0.0000        [35m2.2130[0m  0.5439
      9        2.7027       0.0000        2.3540  0.5344
     10        2.4794       0.0000        2.3149  0.5381
[8 8 8 8 8 8 8 8 8 8 8 6 8 6 6 6 6 8 6 6 8 8 8 8 8 8 8 8 6 8 8 8 8 8 8 8 8
 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 6 8 8 8 6 8 8 8 8 8 8 8 8 8 8 8 6 6 6 6
 6 6 6 8 6 6 8 8 8 6 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8 8]
[9 9 9 9 9 9 9 9 9 9 3 3 3 3 3 3 3 3 3 3

5. **Mini-batch training without LSTM.** The model does not do as well in an on-line setting. That could be attributed to the LSTM component requiring more training, which depends on the batch. To compare, we test a version of the model without the LSTM component on the same dataset dataset, which is faster and sometimes gives better results.

In [8]:
batch_size = 10

model_skorch = FCN_Classifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)
    
# Batch processing, we have 100 time series samples, so the model trains with 10 examples every time
for batch in gen_batches(train_data.shape[0], batch_size, min_batch_size=batch_size):
    current_batch = train_data[batch]
    
    X_train_batch = current_batch[:,0:-1]
    y_train_batch = current_batch[:,-1]

    X_train_batch = X_train_batch.astype(np.float32)
    y_train_batch = y_train_batch.astype(np.int64)
    
    partial_fit = model_skorch.partial_fit(X_train_batch, y_train_batch)

predict = model_skorch.predict(X_test)
print(predict)
print(y_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m2.3419[0m       [32m1.0000[0m        [35m2.2270[0m  0.1362
      2        [36m2.0523[0m       0.0000        2.2424  0.1517
      3        2.4576       1.0000        [35m2.2160[0m  0.1398
      4        2.2452       0.0000        2.2598  0.1262
      5        2.4985       0.0000        2.2694  0.1393
      6        2.9719       0.0000        2.3904  0.1710
      7        2.6679       0.0000        2.3867  0.2245
      8        2.5224       0.0000        2.3298  0.1556
      9        2.2429       1.0000        [35m2.2023[0m  0.1264
     10        2.5001       0.0000        2.3692  0.1340
[7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7]
[9 9 9 9 9 9 9 9 9 9 3 3 3 3 3 3 3 3 3 3 4 4 4 4 4 4 4 4 4

6. **Loading a larger dataset.** To test this more, we can try the two incremental versions of the model on a larger time-series dataset, such as [FordA](http://timeseriesclassification.com/description.php?Dataset=FordA). 

In [11]:
import requests, zipfile, io
r = requests.get("http://timeseriesclassification.com/Downloads/FordA.zip", stream=True)
z = zipfile.ZipFile(io.BytesIO(r.content))
z.extractall("data")

In [12]:
train_dataset = arff.load(open('data/FordA_TRAIN.arff'))
train_data = np.array(train_dataset['data'])

X_train = train_data[:,0:-1]
y_train = train_data[:,-1]

X_train = X_train.astype(np.float32)
y_train = y_train.astype(np.int64)

print(X_train.shape)
print(y_train.shape)

print(X_train)
        
y_train = np.where(y_train == -1, 0, y_train)
        
print(y_train)
    

test_dataset = arff.load(open('data/FordA_TEST.arff'))

test_data = np.array(test_dataset['data'])

X_test = test_data[:,0:-1]
y_test = test_data[:,-1]

X_test = X_test.astype(np.float32)
y_test = y_test.astype(np.int64)

y_test = np.where(y_test == -1, 0, y_test)


(3601, 500)
(3601,)
[[-0.79717165 -0.66439205 -0.37301463 ... -0.66439205 -1.0737958
  -1.5643427 ]
 [ 0.8048547   0.6346286   0.37347448 ... -0.71488506 -0.5604429
  -0.31908643]
 [ 0.7279851   0.11128392 -0.49912438 ...  0.39446303  0.3394004
   0.2553906 ]
 ...
 [-0.5700543  -0.33316523 -0.29351854 ... -1.3937145  -0.9427333
  -0.27072167]
 [ 2.006732    2.07915     2.0220363  ... -0.43214503 -0.44123125
  -0.2807089 ]
 [-0.1252409  -0.32536268 -0.48823696 ...  0.5557605   0.574451
   0.573116  ]]
[0 1 0 ... 0 1 0]


7. **Mini-batch learning on the larger dataset.**

In [13]:
from sklearn.utils import gen_batches

batch_size = 100
classes = 2

model_skorch = LSTM_FCN_Classifier(in_channels=1,input_size=500, lstm_layers=8, classes=classes)

for batch in gen_batches(train_data.shape[0], batch_size, min_batch_size=batch_size):
    current_batch = train_data[batch]
    
    X_train_batch = current_batch[:,0:-1]
    y_train_batch = current_batch[:,-1]

    X_train_batch = X_train_batch.astype(np.float32)
    y_train_batch = y_train_batch.astype(np.int64)
    
    y_train_batch = np.where(y_train_batch == -1, 0, y_train_batch)
    
    partial_fit = model_skorch.partial_fit(X_train_batch, y_train_batch)

predict = model_skorch.predict(X_test)

print(predict)
print(y_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.7030[0m       [32m0.4500[0m        [35m0.6958[0m  2.6309
      2        [36m0.6998[0m       0.4500        [35m0.6957[0m  2.0366
      3        [36m0.6979[0m       [32m0.5000[0m        [35m0.6933[0m  2.1787
      4        [36m0.6971[0m       0.5000        [35m0.6932[0m  1.9748
      5        0.7001       0.5000        [35m0.6927[0m  1.9154
      6        [36m0.6848[0m       0.4500        0.6932  1.8258
      7        0.6893       [32m0.6000[0m        [35m0.6922[0m  2.0832
      8        [36m0.6832[0m       0.5500        0.6933  2.1145
      9        [36m0.6760[0m       0.6000        [35m0.6916[0m  2.1590
     10        [36m0.6712[0m       0.5000        0.6925  2.0586
     11        [36m0.6649[0m       [32m0.7000[0m        [35m0.6901[0m  2.0862
     12        0.6683       0.5500        0.6919  2.3860
     13      

8. **Mini-batch learning on the larger dataset without LSTM**

In [14]:
batch_size = 100
classes = 2

#model_skorch = LSTM_FCN_Classifier(in_channels=1,input_size=1460, lstm_layers=8, classes=classes)
model_skorch = FCN_Classifier(in_channels=1,input_size=945, lstm_layers=8, classes=classes)
    
for batch in gen_batches(train_data.shape[0], batch_size, min_batch_size=batch_size):
    current_batch = train_data[batch]
    
    X_train_batch = current_batch[:,0:-1]
    y_train_batch = current_batch[:,-1]

    X_train_batch = X_train_batch.astype(np.float32)
    y_train_batch = y_train_batch.astype(np.int64)
    
    y_train_batch = np.where(y_train_batch == -1, 0, y_train_batch)
    
    partial_fit = model_skorch.partial_fit(X_train_batch, y_train_batch)

predict = model_skorch.predict(X_test)

print(predict)
print(y_test)

print(partial_fit.score(X_test, y_test))

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.7002[0m       [32m0.4500[0m        [35m0.6971[0m  0.5402
      2        [36m0.6939[0m       0.4500        0.6972  0.5470
      3        [36m0.6900[0m       [32m0.5000[0m        [35m0.6935[0m  0.5213
      4        [36m0.6822[0m       0.5000        [35m0.6933[0m  0.5610
      5        [36m0.6699[0m       0.5000        [35m0.6931[0m  0.5475
      6        [36m0.6634[0m       0.4500        0.6958  0.5646
      7        0.6718       [32m0.5500[0m        [35m0.6888[0m  0.4333
      8        0.6684       0.4500        0.6963  0.4213
      9        [36m0.6546[0m       [32m0.6000[0m        [35m0.6840[0m  0.4056
     10        [36m0.6412[0m       0.5000        0.6927  0.4186
     11        0.6436       0.5000        0.6896  0.4066
     12        0.6429       0.5000        0.6906  0.4211
     13        [36m0.6213[0m       0.4