In [1]:
import pandas as pd
import numpy as np
from scipy.io import arff
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn import preprocessing

from tqdm import tqdm
import os
from datetime import datetime
from sktime.utils.load_data import load_from_tsfile_to_dataframe
from torchvision import models

## Data loading: [samples, features, timesteps]

In [2]:
dataset_name = 'ERing'
# dataset_name = 'Cricket'

datasets_directory = "/root/data/Multivariate_ts/"

X_train, y_train = load_from_tsfile_to_dataframe(datasets_directory + dataset_name + f'/{dataset_name}_TRAIN.ts')
X_test, y_test = load_from_tsfile_to_dataframe(datasets_directory + dataset_name + f'/{dataset_name}_TEST.ts')

In [3]:
def features_to_torch(X):
    X = X.applymap(np.array)
    
#     sc = preprocessing.MinMaxScaler()
#     X = X.applymap(lambda x: sc.fit_transform(x.reshape(-1,1)).flatten())

    dimensions_lst = []

    for dim in X.columns:
        dimensions_lst.append(np.dstack(list(X[dim].values))[0])

    dimensions_lst = np.array(dimensions_lst)
    X = torch.from_numpy(np.array(dimensions_lst, dtype=np.float64))
    X = X.transpose(0, 2)
    X = X.transpose(1, 2)
    X = F.normalize(X, dim=1)
    return X.float()


def answers_to_torch(y):
    le = preprocessing.LabelEncoder()
    y = le.fit_transform(y)
    y = torch.from_numpy(np.array(y, dtype=np.int32))
    y = y.long()
    return y

In [4]:
X_train = features_to_torch(X_train)
X_test = features_to_torch(X_test)

y_train = answers_to_torch(y_train)
y_test = answers_to_torch(y_test)

samples_num = X_train.shape[0]
time_steps = X_train.shape[2]
dims_num = X_train.shape[1]
num_classes = len(np.unique(y_test))

print('time_steps:', time_steps)
print('train samples_num:', samples_num)
print('dims_num:', dims_num)
print('num_classes:', num_classes)

time_steps: 65
train samples_num: 30
dims_num: 4
num_classes: 6


In [7]:
X_train.size()

torch.Size([30, 4, 65])

# VGG-1D

In [5]:
def make_vgg_1d():
    
#     fc_hidden_dim = 4096
    fc_hidden_dim = 50

    model = nn.Sequential(
                nn.Conv1d(dims_num, 64, kernel_size=3, stride=1, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                nn.Conv1d(64, 128, kernel_size=3, stride=1, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                nn.Conv1d(128, 256, kernel_size=3, stride=1, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv1d(256, 256, kernel_size=3, stride=1, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                nn.Conv1d(256, 512, kernel_size=3, stride=1, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv1d(512, 512, kernel_size=3, stride=1, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),
                nn.Conv1d(512, 512, kernel_size=3, stride=1, padding=1),
                nn.ReLU(inplace=True),
                nn.Conv1d(512, 512, kernel_size=3, stride=1, padding=1),
                nn.ReLU(inplace=True),
                nn.MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False),

                nn.Flatten(),
                nn.Linear(in_features=512 * (time_steps // 2**5), out_features=fc_hidden_dim, bias=True),

                nn.ReLU(inplace=True),
                nn.Dropout(p=0.5, inplace=False),
                nn.Linear(in_features=fc_hidden_dim, out_features=fc_hidden_dim, bias=True),
                nn.ReLU(inplace=True),
                nn.Dropout(p=0.5, inplace=False),
                nn.Linear(in_features=fc_hidden_dim, out_features=num_classes, bias=True),
                nn.Softmax()
                        )
        
    return model

In [6]:
# models.vgg11()

## Training

In [7]:
def iterate_minibatches(inputs, targets, batchsize, shuffle=False):
    assert len(inputs) == len(targets)
    if shuffle:
        indices = np.random.permutation(len(inputs))
    for start_idx in range(0, len(inputs) - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield inputs[excerpt], targets[excerpt]
        
    
def compute_accuracy(logits, y_true, device='cuda:0'):
    y_pred = torch.argmax(logits, dim=1)
    y_true_on_device = y_true.to(device)
    accuracy = (y_pred == y_true_on_device).float().mean()
    return accuracy

In [8]:
def train(model, num_epochs, batch_size, opt, criterion):

    train_loss = []
    train_accuracy = []
    val_accuracy = []

    for epoch in range(num_epochs):

        model.train(True)

        train_accuracy_batch = []

        for batch_no, (X_batch, y_batch) in enumerate(iterate_minibatches(X_train, y_train, 
                                                                  batchsize=batch_size, 
                                                                  shuffle=True)):
            model.zero_grad()
            X_batch_gpu = X_batch.to(device)
            y_batch = y_batch.to(device)

            logits = model(X_batch_gpu)
            loss = criterion(logits, y_batch)
            loss.backward()
            opt.step()

            if batch_no % 10 == 0:
                train_loss.append(loss.item())
                accuracy = compute_accuracy(logits, y_batch, device=device)
                train_accuracy_batch.append(accuracy.item())

        train_accuracy_overall = np.mean(train_accuracy_batch) * 100
        train_accuracy.append(train_accuracy_overall.item())

    
        model.train(False)
        val_accuracy_batch = []
        for X_batch, y_batch in iterate_minibatches(X_test, y_test, 
                                                    batchsize=batch_size, 
                                                    shuffle=True):
            X_batch_gpu = X_batch.to(device)
            logits = model(X_batch_gpu)

            accuracy = compute_accuracy(logits, y_batch, device=device)
            val_accuracy_batch.append(accuracy.item())

        val_accuracy_overall = np.mean(val_accuracy_batch) * 100
        val_accuracy.append(val_accuracy_overall.item())

            
        print(f'\nEpoch {epoch+1}/{num_epochs}')
        print(f'Train accuracy = {train_accuracy_overall}')
        print(f'Validation accuracy = {val_accuracy_overall}')

    return model

In [9]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")

model = make_vgg_1d()
model = model.to(device)

batch_size = 5
num_epochs = 50

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
# optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)

In [10]:
model = train(model, num_epochs, batch_size, optimizer, criterion)

  input = module(input)



Epoch 1/50
Train accuracy = 0.0
Validation accuracy = 16.66666697020884

Epoch 2/50
Train accuracy = 20.000000298023224
Validation accuracy = 16.66666697020884

Epoch 3/50
Train accuracy = 20.000000298023224
Validation accuracy = 16.666666942614096

Epoch 4/50
Train accuracy = 0.0
Validation accuracy = 16.666666915019356

Epoch 5/50
Train accuracy = 20.000000298023224
Validation accuracy = 16.666666942614096

Epoch 6/50
Train accuracy = 20.000000298023224
Validation accuracy = 16.66666699780358

Epoch 7/50
Train accuracy = 0.0
Validation accuracy = 16.666666915019356

Epoch 8/50
Train accuracy = 0.0
Validation accuracy = 16.666666915019356

Epoch 9/50
Train accuracy = 0.0
Validation accuracy = 16.66666697020884

Epoch 10/50
Train accuracy = 20.000000298023224
Validation accuracy = 16.66666699780358

Epoch 11/50
Train accuracy = 0.0
Validation accuracy = 16.66666699780358

Epoch 12/50
Train accuracy = 20.000000298023224
Validation accuracy = 16.66666697020884

Epoch 13/50
Train accurac

KeyboardInterrupt: 