In [1]:
import numpy as np
import torch
from sklearn import preprocessing

from sail.models.torch.os_cnn import OSCNNCLassifier

In [2]:
# methods for preprocessing data 
def set_nan_to_zero(a):
    where_are_NaNs = np.isnan(a)
    a[where_are_NaNs] = 0
    return a

def TSC_data_loader(dataset_path,dataset_name):
    Train_dataset = np.loadtxt(
        dataset_path + '/' + dataset_name + '/' + dataset_name + '_TRAIN.tsv')
    Test_dataset = np.loadtxt(
        dataset_path + '/' + dataset_name + '/' + dataset_name + '_TEST.tsv')
    Train_dataset = Train_dataset.astype(np.float32)
    Test_dataset = Test_dataset.astype(np.float32)

    X_train = Train_dataset[:, 1:]
    y_train = Train_dataset[:, 0:1]

    X_test = Test_dataset[:, 1:]
    y_test = Test_dataset[:, 0:1]
    le = preprocessing.LabelEncoder()
    le.fit(np.squeeze(y_train, axis=1))
    y_train = le.transform(np.squeeze(y_train, axis=1))
    y_test = le.transform(np.squeeze(y_test, axis=1))
    return set_nan_to_zero(X_train), y_train, set_nan_to_zero(X_test), y_test


def load_to_torch(X_train, y_train, X_test, y_test, device):
    X_train = torch.from_numpy(X_train)
    X_train.requires_grad = False
    X_train = X_train.to(device)
    y_train = torch.from_numpy(y_train).to(device)

    X_test = torch.from_numpy(X_test)
    X_test.requires_grad = False
    X_test = X_test.to(device)
    y_test = torch.from_numpy(y_test).to(device)


    if len(X_train.shape) == 2:
        X_train = X_train.unsqueeze_(1)
        X_test = X_test.unsqueeze_(1)
    return X_train, y_train, X_test, y_test

##### Download Dataset


In [3]:
import gdown
url = "https://drive.google.com/drive/folders/1SyX8ylC6TbwJPnLKPP_o4qdm03r-0Yby"
gdown.download_folder(url, quiet=True, use_cookies=False)

['/Users/dhaval/Projects/SAIL/sail/notebooks/UCRArchive_2018/FiftyWords/desktop.ini',
 '/Users/dhaval/Projects/SAIL/sail/notebooks/UCRArchive_2018/FiftyWords/FiftyWords_TEST.tsv',
 '/Users/dhaval/Projects/SAIL/sail/notebooks/UCRArchive_2018/FiftyWords/FiftyWords_TRAIN.tsv',
 '/Users/dhaval/Projects/SAIL/sail/notebooks/UCRArchive_2018/FiftyWords/README.md']

In [4]:
dataset_path = "UCRArchive_2018"
dataset_name = "FiftyWords"

# load data,
X_train, y_train, X_test, y_test = TSC_data_loader(dataset_path, dataset_name)
print('train data shape', X_train.shape)
print()
print('train label shape',y_train.shape)
print('test data shape',X_test.shape)
print('test label shape',y_test.shape)
print('unique train label',np.unique(y_train))
print('unique test label',np.unique(y_test))
device = "cpu"
X_train, y_train, X_test, y_test = load_to_torch(X_train, y_train, X_test, y_test, device)

# the model prints out the result every epoch
# defaul epoch size = 20
Max_kernel_size = 89
start_kernel_size = 1
# loss, optimizer, scheduler
input_channel = X_train.shape[1] # input channel size
n_class = max(y_train) + 1 # output class number
receptive_field_shape= min(int(X_train.shape[-1]/4),Max_kernel_size)

model = OSCNNCLassifier(n_class.item(), input_channel, receptive_field_shape)
model.fit(X_train, y_train)

train data shape (450, 270)

train label shape (450,)
test data shape (455, 270)
test label shape (455,)
unique train label [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49]
unique test label [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49]
  epoch    train_loss     dur
-------  ------------  ------
      1        [36m3.5016[0m  2.7310
      2        [36m2.9053[0m  2.8249
      3        [36m2.5445[0m  2.8687
      4        [36m2.2457[0m  2.8680
      5        [36m1.9874[0m  2.8601
      6        [36m1.7832[0m  3.5804
      7        [36m1.5714[0m  2.9875
      8        [36m1.3965[0m  2.9456
      9        [36m1.2541[0m  2.7899
     10        [36m1.1163[0m  2.7804
     11        [36m0.9997[0m  2.7760
     12        [36m0.8810[0m  2.9273
     13        [36m0.

<class 'sail.models.torch.os_cnn.OSCNNCLassifier'>[initialized](
  module_=_OS_CNN(
    (net): Sequential(
      (0): build_layer_with_layer_parameter(
        (padding): ConstantPad1d(padding=(33, 33), value=0)
        (conv1d): Conv1d(1, 20, kernel_size=(67,), stride=(1,))
        (bn): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): build_layer_with_layer_parameter(
        (padding): ConstantPad1d(padding=(33, 33), value=0)
        (conv1d): Conv1d(20, 400, kernel_size=(67,), stride=(1,))
        (bn): BatchNorm1d(400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (2): build_layer_with_layer_parameter(
        (padding): ConstantPad1d(padding=(0, 1), value=0)
        (conv1d): Conv1d(400, 40, kernel_size=(2,), stride=(1,))
        (bn): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (averagepool): AdaptiveAvgPool1d(output_size=1)
    (hidden): Linear

In [5]:
correct = 0
N_test=X_test.shape[0]
yhat = model.predict(X_test)
correct += (torch.tensor(yhat) == y_test).sum().item()
accuracy = correct / N_test
accuracy

0.7406593406593407