In [1]:
import sys
import os
os.chdir("..")

from os.path import dirname

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn import preprocessing

from sail.models.torch.os_cnn import OS_CNN_CLassifier

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# methods for preprocessing data 
def set_nan_to_zero(a):
    where_are_NaNs = np.isnan(a)
    a[where_are_NaNs] = 0
    return a

def TSC_data_loader(dataset_path,dataset_name):
    Train_dataset = np.loadtxt(
        dataset_path + '/' + dataset_name + '/' + dataset_name + '_TRAIN.tsv')
    Test_dataset = np.loadtxt(
        dataset_path + '/' + dataset_name + '/' + dataset_name + '_TEST.tsv')
    Train_dataset = Train_dataset.astype(np.float32)
    Test_dataset = Test_dataset.astype(np.float32)

    X_train = Train_dataset[:, 1:]
    y_train = Train_dataset[:, 0:1]

    X_test = Test_dataset[:, 1:]
    y_test = Test_dataset[:, 0:1]
    le = preprocessing.LabelEncoder()
    le.fit(np.squeeze(y_train, axis=1))
    y_train = le.transform(np.squeeze(y_train, axis=1))
    y_test = le.transform(np.squeeze(y_test, axis=1))
    return set_nan_to_zero(X_train), y_train, set_nan_to_zero(X_test), y_test


def load_to_torch(X_train, y_train, X_test, y_test, device):
    X_train = torch.from_numpy(X_train)
    X_train.requires_grad = False
    X_train = X_train.to(device)
    y_train = torch.from_numpy(y_train).to(device)

    X_test = torch.from_numpy(X_test)
    X_test.requires_grad = False
    X_test = X_test.to(device)
    y_test = torch.from_numpy(y_test).to(device)


    if len(X_train.shape) == 2:
        X_train = X_train.unsqueeze_(1)
        X_test = X_test.unsqueeze_(1)
    return X_train, y_train, X_test, y_test

In [3]:
# download data from https://drive.google.com/file/d/1H6IRgk99P0QSi9yLBcZwa9uoBKhg0ZCA/view?usp=sharing
# dataset_path = dirname("../UCRArchive_2018/")
# dataset_name = "FiftyWords"

# load data,
X_train, y_train, X_test, y_test = TSC_data_loader(dataset_path, dataset_name)
print('train data shape', X_train.shape)
print()
print('train label shape',y_train.shape)
print('test data shape',X_test.shape)
print('test label shape',y_test.shape)
print('unique train label',np.unique(y_train))
print('unique test label',np.unique(y_test))
device = "cpu"
X_train, y_train, X_test, y_test = load_to_torch(X_train, y_train, X_test, y_test, device)

# the model prints out the result every epoch
# defaul epoch size = 20
Max_kernel_size = 89
start_kernel_size = 1
# loss, optimizer, scheduler
input_channel = X_train.shape[1] # input channel size
n_class = max(y_train) + 1 # output class number
receptive_field_shape= min(int(X_train.shape[-1]/4),Max_kernel_size)

model = OS_CNN_CLassifier(n_class.item(), input_channel, receptive_field_shape)
model.fit(X_train, y_train)

train data shape (450, 270)

train label shape (450,)
test data shape (455, 270)
test label shape (455,)
unique train label [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49]
unique test label [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
 48 49]




  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m3.7022[0m       [32m0.2222[0m        [35m3.3004[0m  1.4693
      2        [36m3.1146[0m       [32m0.2333[0m        [35m3.0906[0m  1.4875
      3        [36m2.7727[0m       [32m0.2556[0m        [35m2.6256[0m  1.4676
      4        [36m2.4804[0m       [32m0.3111[0m        [35m2.3502[0m  1.4532
      5        [36m2.2411[0m       [32m0.3889[0m        [35m2.0951[0m  1.4549
      6        [36m2.0516[0m       0.3778        2.1018  1.4673
      7        [36m1.8672[0m       [32m0.4556[0m        [35m1.9292[0m  1.4843
      8        [36m1.6836[0m       [32m0.4778[0m        [35m1.8325[0m  1.5251
      9        [36m1.5213[0m       0.4444        1.9530  1.5033
     10        [36m1.3636[0m       [32m0.5889[0m        [35m1.5287[0m  1.6161
     11        [36m1.1743[0m       0.5111        1.6169  1.5336
     12        

<class 'sail.models.torch.os_cnn.OS_CNN_CLassifier'>[initialized](
  module_=_OS_CNN(
    (net): Sequential(
      (0): build_layer_with_layer_parameter(
        (padding): ConstantPad1d(padding=(33, 33), value=0)
        (conv1d): Conv1d(1, 20, kernel_size=(67,), stride=(1,))
        (bn): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (1): build_layer_with_layer_parameter(
        (padding): ConstantPad1d(padding=(33, 33), value=0)
        (conv1d): Conv1d(20, 400, kernel_size=(67,), stride=(1,))
        (bn): BatchNorm1d(400, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (2): build_layer_with_layer_parameter(
        (padding): ConstantPad1d(padding=(0, 1), value=0)
        (conv1d): Conv1d(400, 40, kernel_size=(2,), stride=(1,))
        (bn): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
    )
    (averagepool): AdaptiveAvgPool1d(output_size=1)
    (hidden): Line

In [7]:
accuracy_list=[]
correct = 0
N_test=X_test.shape[0]
yhat = model.predict(X_test)
correct += (torch.tensor(yhat) == y_test).sum().item()
accuracy = correct / N_test
accuracy

0.6615384615384615

In [None]:
# # Plot the loss and accuracy
# import matplotlib.pyplot as plt

# fig, ax1 = plt.subplots()
# color = 'tab:red'
# ax1.plot(cost_list, color=color)
# ax1.set_xlabel('epoch', color=color)
# ax1.set_ylabel('Cost', color=color)
# ax1.tick_params(axis='y', color=color)
    
# ax2 = ax1.twinx()  
# color = 'tab:blue'
# ax2.set_ylabel('accuracy', color=color) 
# ax2.set_xlabel('epoch', color=color)
# ax2.plot( accuracy_list, color=color)
# ax2.tick_params(axis='y', color=color)
# fig.tight_layout()