In [1]:
import sys

from os.path import dirname

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn import preprocessing

from sail.models.torch.os_cnn import OS_CNN_CLassifier

In [2]:
# methods for preprocessing data 
def set_nan_to_zero(a):
    where_are_NaNs = np.isnan(a)
    a[where_are_NaNs] = 0
    return a

def TSC_data_loader(dataset_path,dataset_name):
    Train_dataset = np.loadtxt(
        dataset_path + '/' + dataset_name + '/' + dataset_name + '_TRAIN.tsv')
    Test_dataset = np.loadtxt(
        dataset_path + '/' + dataset_name + '/' + dataset_name + '_TEST.tsv')
    Train_dataset = Train_dataset.astype(np.float32)
    Test_dataset = Test_dataset.astype(np.float32)

    X_train = Train_dataset[:, 1:]
    y_train = Train_dataset[:, 0:1]

    X_test = Test_dataset[:, 1:]
    y_test = Test_dataset[:, 0:1]
    le = preprocessing.LabelEncoder()
    le.fit(np.squeeze(y_train, axis=1))
    y_train = le.transform(np.squeeze(y_train, axis=1))
    y_test = le.transform(np.squeeze(y_test, axis=1))
    return set_nan_to_zero(X_train), y_train, set_nan_to_zero(X_test), y_test


def load_to_torch(X_train, y_train, X_test, y_test, device):
    X_train = torch.from_numpy(X_train)
    X_train.requires_grad = False
    X_train = X_train.to(device)
    y_train = torch.from_numpy(y_train).to(device)

    X_test = torch.from_numpy(X_test)
    X_test.requires_grad = False
    X_test = X_test.to(device)
    y_test = torch.from_numpy(y_test).to(device)


    if len(X_train.shape) == 2:
        X_train = X_train.unsqueeze_(1)
        X_test = X_test.unsqueeze_(1)
    return X_train, y_train, X_test, y_test

##### Download Dataset

In [1]:
import gdown
url = "https://drive.google.com/drive/folders/1SyX8ylC6TbwJPnLKPP_o4qdm03r-0Yby"
gdown.download_folder(url, quiet=True, use_cookies=False)

['/Users/dhaval/Projects/MORE/sail-version-bump/notebooks/UCRArchive_2018/FiftyWords/desktop.ini',
 '/Users/dhaval/Projects/MORE/sail-version-bump/notebooks/UCRArchive_2018/FiftyWords/FiftyWords_TEST.tsv',
 '/Users/dhaval/Projects/MORE/sail-version-bump/notebooks/UCRArchive_2018/FiftyWords/FiftyWords_TRAIN.tsv',
 '/Users/dhaval/Projects/MORE/sail-version-bump/notebooks/UCRArchive_2018/FiftyWords/README.md']

In [2]:
dataset_path = "UCRArchive_2018"
dataset_name = "FiftyWords"

# load data,
X_train, y_train, X_test, y_test = TSC_data_loader(dataset_path, dataset_name)
print('train data shape', X_train.shape)
print()
print('train label shape',y_train.shape)
print('test data shape',X_test.shape)
print('test label shape',y_test.shape)
print('unique train label',np.unique(y_train))
print('unique test label',np.unique(y_test))
device = "cpu"
X_train, y_train, X_test, y_test = load_to_torch(X_train, y_train, X_test, y_test, device)

# the model prints out the result every epoch
# defaul epoch size = 20
Max_kernel_size = 89
start_kernel_size = 1
# loss, optimizer, scheduler
input_channel = X_train.shape[1] # input channel size
n_class = max(y_train) + 1 # output class number
receptive_field_shape= min(int(X_train.shape[-1]/4),Max_kernel_size)

model = OS_CNN_CLassifier(n_class.item(), input_channel, receptive_field_shape)
model.fit(X_train, y_train)

NameError: name 'TSC_data_loader' is not defined

In [5]:
correct = 0
N_test=X_test.shape[0]
yhat = model.predict(X_test)
correct += (torch.tensor(yhat) == y_test).sum().item()
accuracy = correct / N_test
accuracy

0.6791208791208792