In [1]:
%matplotlib inline

In [2]:
from xgboost import XGBClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

from sklearn.model_selection import GridSearchCV

from sklearn.metrics import accuracy_score

import torch
from torch import nn
from torch.autograd import Variable
from torch.utils.data import DataLoader, Dataset
### our imports

from src.utils import get_data_from_directory, get_files_directory_list
from src.TFE import *

In [3]:
#!wget -nc "http://www.timeseriesclassification.com/Downloads/Archives/Univariate2018_arff.zip"
#!unzip -q -n "Univariate2018_arff.zip"

In [7]:
directory_list = get_files_directory_list()
directory_list = sorted(directory_list)

random_index = 5
random_path = directory_list[random_index]

X_train, X_test, y_train, y_test = get_data_from_directory(random_path)
X_train = X_train.squeeze()
y_train = y_train.squeeze()
X_test = X_test.squeeze()
y_test = y_test.squeeze()

print('Dataset: ', random_path)
print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print('X_test shape:  ', X_test.shape)
print('y_test shape:  ', y_test.shape)

Dataset:  ArrowHead
X_train shape:  (36, 251)
y_train shape:  (36,)
X_test shape:   (175, 251)
y_test shape:   (175,)


In [8]:
feature_extractor = TopologicalFeaturesExtractor(
    persistence_diagram_extractor=PersistenceDiagramsExtractor(takens_embedding_dim=3, 
                                                               takens_embedding_delay=10,
                                                               homology_dimensions=(0, 1, 2)),
    persistence_diagram_features=[HolesNumberFeature(),
                                  MaxHoleLifeTimeFeature(),
                                  RelevantHolesNumber(),
                                  AverageHoleLifetimeFeature(),
                                  SumHoleLifetimeFeature(),
                                  PersistenceEntropyFeature(),
                                  SimultaneousAliveHolesFeatue()])

X_train_transformed = feature_extractor.fit_transform(X_train)
X_test_transformed = feature_extractor.fit_transform(X_test)

In [9]:
print('X_train_transformed shape: ', X_train_transformed.shape)
print('X_test_transformed shape:  ', X_test_transformed.shape)

X_train_transformed shape:  (36, 21)
X_test_transformed shape:   (175, 21)


In [10]:
parameters = {"C": [10**i for i in range(-2, 5)],
              "kernel": ["linear", "rbf", "sigmoid", "poly"]}

svc_cv = GridSearchCV(SVC(random_state=42), 
                      param_grid=parameters,
                      cv=5,
                      scoring='accuracy', 
                      n_jobs=-1)
svc_cv.fit(X_train_transformed, y_train)

print("Train accuracy: ", accuracy_score(y_train, svc_cv.best_estimator_.predict(X_train_transformed)))
print("Test accuracy: ", accuracy_score(y_test, svc_cv.best_estimator_.predict(X_test_transformed)))

Train accuracy:  0.7222222222222222
Test accuracy:  0.4857142857142857


In [11]:
parameters = {"max_depth": [2, 10, 15, 20, 25, 30, 35, 40, 45, 50, 70, 100, 120, 150],
              "n_estimators": [20, 50, 100, 150, 200, 250]}
svc_cv = GridSearchCV(XGBClassifier(n_jobs=-1, random_state=42), 
                      param_grid=parameters,
                      cv=2,
                      scoring='accuracy', 
                      n_jobs=-1)
svc_cv.fit(X_train_transformed, y_train)

print("Train accuracy: ", accuracy_score(y_train, svc_cv.best_estimator_.predict(X_train_transformed)))
print("Test accuracy: ", accuracy_score(y_test, svc_cv.best_estimator_.predict(X_test_transformed)))

Train accuracy:  1.0
Test accuracy:  0.5028571428571429


In [12]:
parameters = {"n_neighbors": [3, 5, 7, 11,]}

knn_cv = GridSearchCV(KNeighborsClassifier(n_jobs=-1), 
                      param_grid=parameters,
                      cv=5,
                      scoring='accuracy', 
                      n_jobs=-1)
knn_cv.fit(X_train_transformed, y_train)

print("Train accuracy: ", accuracy_score(y_train, knn_cv.best_estimator_.predict(X_train_transformed)))
print("Test accuracy: ", accuracy_score(y_test, knn_cv.best_estimator_.predict(X_test_transformed)))

Train accuracy:  0.75
Test accuracy:  0.5485714285714286


In [17]:
device = torch.device('cuda')

In [18]:
batch_size = 32

D_in = X_train_transformed.shape[1]
D_out = np.unique(y_test).shape[0]
H = 100

D_in, D_out

(21, 3)

In [19]:
model = nn.Sequential(nn.Conv1d(1, 32, 3),
                      nn.MaxPool1d(2),
                      nn.ReLU(),
                      
                      nn.Conv1d(32, 32, 4),
                      nn.MaxPool1d(2),
                      nn.ReLU(),
                      
                      nn.Conv1d(32, 16, 3, 2),
                      nn.ReLU(),
                      
                      nn.Flatten(),
                      
                      nn.Linear(16, 128),
                      nn.ReLU(),
                      nn.Linear(128, 32),
                      nn.Sigmoid(),
                      nn.Linear(32, D_out),
                      nn.Softmax(dim=0))

In [20]:
def init_weights(m):
    if type(m) == nn.Linear:
        torch.nn.init.xavier_normal_(m.weight)

In [21]:
#model = ANN(D_in, D_out)

model = model.apply(init_weights).to(device)
model

Sequential(
  (0): Conv1d(1, 32, kernel_size=(3,), stride=(1,))
  (1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (2): ReLU()
  (3): Conv1d(32, 32, kernel_size=(4,), stride=(1,))
  (4): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (5): ReLU()
  (6): Conv1d(32, 16, kernel_size=(3,), stride=(2,))
  (7): ReLU()
  (8): Flatten()
  (9): Linear(in_features=16, out_features=128, bias=True)
  (10): ReLU()
  (11): Linear(in_features=128, out_features=32, bias=True)
  (12): Sigmoid()
  (13): Linear(in_features=32, out_features=3, bias=True)
  (14): Softmax(dim=0)
)

In [22]:
def one_hot_encoding(x, dtype=float):
    x = np.asarray(x).astype(int)-1
    n = np.unique(x).shape[0]
    return np.eye(int(n), dtype=dtype)[x]

In [23]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

class TimeSeriesDataset(Dataset):
    def __init__(self, X, y, device=None,):
        super(TimeSeriesDataset, self).__init__()
        
        X = X[..., np.newaxis]
        self.X = torch.tensor(X, dtype=torch.float32).permute(0,2,1)
        self.y = torch.tensor(y, dtype=torch.float32)
        
        device_str = 'cuda' if torch.cuda.is_available() else 'cpu'
        self.device = torch.device(device_str) if device is None else device
        
    def __len__(self):
        return self.X.shape[0]
    
    def __getitem__(self, idx):
        return self.X[idx].to(self.device), self.y[idx].to(self.device)

In [24]:
y_hot_train = one_hot_encoding(y_train)
y_hot_test = one_hot_encoding(y_test)

In [25]:
dataset_train = TimeSeriesDataset(X_train_transformed, y_hot_train)
dataset_test  = TimeSeriesDataset(X_test_transformed, y_hot_test)

loader_train = DataLoader(dataset_train, batch_size=batch_size)
loader_test = DataLoader(dataset_test, batch_size=batch_size)

In [26]:
for x, y in loader_train:
    print(x.size())

torch.Size([32, 1, 21])
torch.Size([4, 1, 21])


In [27]:
num_epoch = 1000
loss_function = torch.nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2,)

for epoch in range(num_epoch):
    total_loss, total_val_loss = list(), list()
    
    model.train()
    for x, y in loader_train:
        # forward
        optimizer.zero_grad()
        output = model(x)
        loss = loss_function(output, y)
        
        loss.backward()
        optimizer.step()
        
        total_loss.append(loss.item())
    
    model.eval()
    best_val_loss = np.inf
    for x, y in loader_test:
        # Compute the validation loss
        output = model(x)
        val_loss = loss_function(output, y)
        total_val_loss.append(val_loss.item())

        #if best_val_loss >= val_loss.item() and save_dir:
        #    best_val_loss = val_loss.item()
        #    torch.save(model.state_dict(), save_dir)
    
    freq = max(num_epoch//20, 1)
    if epoch%freq==0:
        print('Epoch {}/{} \t\t || Loss:  Train {:.4f} | Validation {:.4f}'.format(epoch, 
                                                                              num_epoch, 
                                                                              sum(total_loss), 
                                                                              sum(total_val_loss)))

Epoch 0/1000 		 || Loss:  Train 0.5332 | Validation 1.8605
Epoch 50/1000 		 || Loss:  Train 0.4453 | Validation 1.8707
Epoch 100/1000 		 || Loss:  Train 0.3749 | Validation 1.8698
Epoch 150/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 200/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 250/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 300/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 350/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 400/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 450/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 500/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 550/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 600/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 650/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 700/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 750/1000 		 || Loss:  Train 0.5426 | Validation 1.8607
Epoch 800/1000 		 || Loss: 

In [28]:
model_pred = lambda x: np.concatenate([(model(x_i[0]).argmax(dim=1)+1).cpu().detach().numpy() for x_i in x])

In [29]:
print("Train accuracy: ", accuracy_score(y_train, model_pred(loader_train)))
print("Test accuracy: ", accuracy_score(y_test, model_pred(loader_test)))

Train accuracy:  0.3333333333333333
Test accuracy:  0.3028571428571429


In [885]:
model(x)

tensor([[0.2524, 0.2615],
        [0.2444, 0.2421],
        [0.2474, 0.2345],
        [0.2559, 0.2620]], device='cuda:0', grad_fn=<SoftmaxBackward>)