## Import

In [2]:
import os
import copy
import time
import sys
import random
import timeit

import numpy as np
import scipy.signal
import scipy.io

import pandas as pd

import itertools
from itertools import product

import matplotlib.pyplot as plt
%matplotlib inline
plt.style.use('seaborn-whitegrid')
plt.rcParams.update({'font.size': 32})
plt.rcParams["figure.figsize"] = (12,8)
# import seaborn as sns

In [3]:
try:
    import tqdm
except:
    !pip install tqdm
finally:
    from tqdm.notebook import tqdm, trange

In [5]:
import torch
import torchvision

import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data


from torchvision import transforms, datasets
import torchvision.transforms as T

In [6]:
try:
    import torchsummary
except:
    !pip install torchsummary
finally:
    import torchsummary

In [8]:
import warnings
warnings.filterwarnings('ignore')

In [9]:
try:
    import h5py
except:
    !pip install h5py
finally:    
    import h5py

In [10]:
def torch_stats(): 
    torch_version = ".".join(torch.__version__.split(".")[:2])
    print('torch version:',torch_version)
    
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print('Using device:', device)
    dtype = torch.float32
        
    if device.type == 'cuda':
        cuda_version  = torch.__version__.split("+")[-1]
        print("cuda: ", cuda_version)
        
        torch.set_default_tensor_type(torch.cuda.FloatTensor)
        print('Cuda is available:',torch.cuda.is_available())

        n_devices = torch.cuda.device_count()
        print('number of devices: %d'%(n_devices))

        for cnt_device in range(n_devices):
            print(torch.cuda.get_device_name(cnt_device))
            print('Memory Usage:')
            print('Allocated:', round(torch.cuda.memory_allocated(cnt_device)/1024**3,1), 'GB')
            print('Cached:   ', round(torch.cuda.memory_reserved(cnt_device)/1024**3,1), 'GB')
        # dtype = torch.cuda.FloatTensor
        
    torch.set_default_dtype(dtype) # float32
    print('default data type:',dtype)
    
    num_workers=os.cpu_count()
    print ('available number of workers (CPU cores):',num_workers)
    
    return device, dtype, num_workers
#-------------------------------
def torch_seed(seed = 42, deterministic = True):
    random.seed(seed) # random and transforms
    np.random.seed(seed) #numpy
    torch.manual_seed(seed) #cpu
    torch.cuda.manual_seed(seed) #gpu
    torch.backends.cudnn.deterministic=deterministic #cudnn    
    
device, dtype, num_workers = torch_stats()
torch_seed(seed = 2, deterministic = True)

In [11]:
num_workers = 0

In [12]:
def autocor(x, n_fft = None):
    if n_fft is None: n_fft =  2*x.shape[1]
    x   = torch.atleast_2d(torch.as_tensor(x))
    sp  = torch.fft.fft(x, n =n_fft, dim = -1) 
    sp  = (sp*sp.conj()).real     
    return torch.fft.ifft(sp, n =n_fft, dim = -1)[:,:n_fft//2].real 

def square_filter(x, fp, lp, real_val = True):
    x     = torch.atleast_2d(torch.as_tensor(x))
    n_fft =  x.shape[1]
    sp    = torch.fft.fft(x, n =n_fft, dim = -1)  
    sp    = _square_filter_(sp, fp, lp, n_fft, real_val = True)
    out   = torch.fft.ifft(sp, n =n_fft, dim = -1)
    if real_val: out = out.real
    return out

def hilbert(x):
    n_fft =  x.shape[1]
    return square_filter(x, 0, n_fft//2, real_val = False)

def _square_filter_(sp, fp, lp, n_fft, real_val = True):
    sp[:,:fp] = 0
    sp[:,lp:n_fft//2] = 0
    if real_val:
        sp[:,n_fft - fp +1:] = 0    
        sp[:,n_fft//2:n_fft//2+(n_fft//2-lp) +1 ] = 0
    else:
        sp[:,n_fft//2:] = 0
    return sp

def transform(x, fp, lp):
    n_fft =  2*x.shape[1]
    x     = torch.atleast_2d(torch.as_tensor(x))
    sp    = torch.fft.fft(x, n =n_fft, dim = -1) 
    sp    = _square_filter_(sp, fp, lp, n_fft, real_val = True)
    sp    = (sp*sp.conj()).real     
    return torch.fft.ifft(sp, n =n_fft, dim = -1)[:,:x.shape[1]].real 

def transform(x, fp, lp):    
    return square_filter(x, fp, lp, real_val = True)

Data import

In [13]:
# from google.colab import drive
# drive.mount('/content/drive')
# dataset = h5py.File('/content/drive/MyDrive/dataset_.hdf5', 'r')

In [14]:
try:
    import gdown
except:
    !pip install gdown
finally:
    import gdown

url = 'https://drive.google.com/uc?export=download&id=1IaFJfZewg8JkfU8034gDiog0oP9mEHI3'
output = 'dataset_.hdf5'
gdown.download(url, output, quiet=False)
dataset = h5py.File('dataset_.hdf5', 'r')

In [15]:
fs = 50e3


In [16]:
class HD5Dataset(nn.Module):

    def __init__(self, ds, parts, signal_size, real_size = 1e3, transform=None, device = None):
        self.ds = ds
        self.parts = parts
        self.signal_size = signal_size
        self.transform = transform
        self._len = 0
        self.ncls  = 0
        self.signals_in_class = []
        self.signals_in_part  = []
        self.classes_in_part  = []
        self.real_size = int(real_size)
        self.n_parts = len(self.parts)
        for part in self.parts:
            shape = self.ds[part].shape
            self.signals_in_part += [shape[0]*(shape[1]//self.signal_size)]
            self.ncls  += shape[0]
            self.signals_in_class +=[shape[1]//self.signal_size]
            self._len += self.signals_in_part[-1]
            self.classes_in_part +=[shape[0]]
        # self.classes_in_part = torch.tensor(self.classes_in_part)
        # self.signals_in_part = torch.tensor(self.signals_in_part) 
        # self.signals_in_class = torch.tensor(self.signals_in_class) 
        if device is not None:
            self.device = device
        else:
          self.device = 'cpu'

    def incpect(self):
        print('signals_in_part',self.signals_in_part)
        print('signals_in_class',self.signals_in_class)
        print('n cls',self.ncls)
        print('len',self.__len__())
        print('n_parts',self.n_parts)
        print('classes_in_part',self.classes_in_part)
    
    def idx_2_position(self, idx):
        part = idx//self.signals_in_part[0]
        idx  = idx - self.signals_in_part[0]*part
        source = idx// self.signals_in_class[part] 
        segment = idx - self.signals_in_class[part] *source
        fp = segment*self.signal_size
        lp = (segment+1)*self.signal_size
        return part, source, fp, lp
    
    def position_2_class(self, part, source):
        return sum(self.classes_in_part[:part])+source

    def __len__(self):
        return self._len

    def __getitem__(self, idx):
      
        part, source, fp, lp = self.idx_2_position(idx)

        signal = self.ds[self.parts[part]][source,fp:lp]
        label  = self.position_2_class(part, source)

        if self.transform:
            signal = self.transform(signal)

        return torch.as_tensor(signal).to(self.device).unsqueeze(0), torch.as_tensor(label).to(self.device)

In [17]:
if torch.cuda.is_available():
    kwarg = {'generator':torch.Generator(device='cuda')}
else:
    kwarg = {'generator':torch.Generator(device='cpu')}
    
def collate_fn(batch, real_size = int(10e3), device=device):
  x = torch.stack([data_[0] for data_ in batch]).to(device)
  scale =  x.shape[2]//real_size
  x = x.reshape(x.shape[0]*scale, -1, real_size)
  y = torch.stack([data_[1] for data_ in batch]).repeat_interleave(scale)  
  return x,y 

In [18]:
# 
# # bs = [4,8,16,32,64,128]
# bs = [32]
# scales = [10,25,50,75,100,125,150,175,200]
# for scale in scales:
#   test_set   = HD5Dataset(dataset,parts = ['x_test_1','x_test_2'], signal_size=int(scale*10e3))
#   for BATCH_SIZE in bs:
#     testloader = torch.utils.data.DataLoader(test_set,  batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn, **kwarg)
#     start = timeit.default_timer()
#     for x,y in testloader:
#       x.to(device)
#       y.to(device)
#     stop = timeit.default_timer()
#     print(f'BATCH_SIZE {BATCH_SIZE}, scale {scale}, Time: ', stop - start) 




In [19]:
# bs = [32,64,128,256]
# scales = [125,250]
# for scale in scales:
#   test_set   = HD5Dataset(dataset,parts = ['x_test_1','x_test_2'], signal_size=int(scale*10e3))
#   for BATCH_SIZE in bs:
#     testloader = torch.utils.data.DataLoader(test_set,  batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn, **kwarg)
#     start = timeit.default_timer()
#     for x,y in testloader:
#       x.to(device)
#       y.to(device)
#     stop = timeit.default_timer()
#     print(f'BATCH_SIZE {BATCH_SIZE}, scale {scale}, Time: ', stop - start) 

In [20]:
SCALE = 50
BATCH_SIZE = 1
test_set = HD5Dataset(dataset,parts = ['x_test_1','x_test_2'],   signal_size=int(SCALE*10e3))
ds       = HD5Dataset(dataset,parts = ['x_train_1','x_train_2'], signal_size=int(SCALE*10e3))

In [21]:
train_set, val_set = torch.utils.data.random_split(ds, [int(len(ds)*0.7), len(ds)-int(len(ds)*0.7)], **kwarg)

In [22]:
trainloader = torch.utils.data.DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True,  collate_fn=collate_fn, **kwarg)
valloader   = torch.utils.data.DataLoader(val_set,   batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn, **kwarg)
testloader  = torch.utils.data.DataLoader(test_set,  batch_size=BATCH_SIZE, shuffle=False, collate_fn=collate_fn, **kwarg)

In [1]:
import torch
import torch.nn as nn

def correct_sizes(sizes):
    corrected_sizes = [s if s % 2 != 0 else s - 1 for s in sizes]
    return corrected_sizes

def pass_through(X):
    return X

class InceptionModule(nn.Module):
    def __init__(self, in_channels, n_filters, kernel_sizes=[9, 19, 39], bottleneck_channels=32, activation=nn.ReLU(), return_indices=False):
        """
        : param in_channels				Number of input channels (input features)
        : param n_filters				Number of filters per convolution layer => out_channels = 4*n_filters
        : param kernel_sizes			List of kernel sizes for each convolution.
                                        Each kernel size must be odd number that meets -> "kernel_size % 2 !=0".
                                        This is nessesery because of padding size.
                                        For correction of kernel_sizes use function "correct_sizes". 
        : param bottleneck_channels		Number of output channels in bottleneck. 
                                        Bottleneck wont be used if nuber of in_channels is equal to 1.
        : param activation				Activation function for output tensor (nn.ReLU()). 
        : param return_indices			Indices are needed only if we want to create decoder with InceptionTranspose with MaxUnpool1d. 
        """
        super(InceptionModule, self).__init__()
        self.return_indices=return_indices
        if in_channels > 1:
            self.bottleneck = nn.Conv1d(
                                in_channels=in_channels, 
                                out_channels=bottleneck_channels, 
                                kernel_size=1, 
                                stride=1, 
                                bias=False
                                )
        else:
            self.bottleneck = pass_through
            bottleneck_channels = 1

        self.conv_from_bottleneck_1 = nn.Conv1d(
                                        in_channels=bottleneck_channels, 
                                        out_channels=n_filters, 
                                        kernel_size=kernel_sizes[0], 
                                        stride=1, 
                                        padding=kernel_sizes[0]//2, 
                                        bias=False
                                        )
        self.conv_from_bottleneck_2 = nn.Conv1d(
                                        in_channels=bottleneck_channels, 
                                        out_channels=n_filters, 
                                        kernel_size=kernel_sizes[1], 
                                        stride=1, 
                                        padding=kernel_sizes[1]//2, 
                                        bias=False
                                        )
        self.conv_from_bottleneck_3 = nn.Conv1d(
                                        in_channels=bottleneck_channels, 
                                        out_channels=n_filters, 
                                        kernel_size=kernel_sizes[2], 
                                        stride=1, 
                                        padding=kernel_sizes[2]//2, 
                                        bias=False
                                        )
        self.max_pool = nn.MaxPool1d(kernel_size=3, stride=1, padding=1, return_indices=return_indices)
        self.conv_from_maxpool = nn.Conv1d(
                                    in_channels=in_channels, 
                                    out_channels=n_filters, 
                                    kernel_size=1, 
                                    stride=1,
                                    padding=0, 
                                    bias=False
                                    )
        self.batch_norm = nn.BatchNorm1d(num_features=4*n_filters)
        self.activation = activation

    def forward(self, X):
        # step 1
        Z_bottleneck = self.bottleneck(X)
        Z_maxpool = self.max_pool(X)
        # step 2
        Z1 = self.conv_from_bottleneck_1(Z_bottleneck)
        Z2 = self.conv_from_bottleneck_2(Z_bottleneck)
        Z3 = self.conv_from_bottleneck_3(Z_bottleneck)
        Z4 = self.conv_from_maxpool(Z_maxpool)
        # step 3 
        Z = torch.cat([Z1, Z2, Z3, Z4], axis=1)
        Z = self.activation(self.batch_norm(Z))
        return Z


class InceptionModule(nn.Module):
    def __init__(self, in_channels, n_filters=32, kernel_sizes=[9,19,39], bottleneck_channels=32, use_residual=True, activation=nn.ReLU(), return_indices=False):
        super(InceptionModule, self).__init__()
        self.use_residual = use_residual
        self.return_indices = return_indices
        self.activation = activation
        self.inception_1 = InceptionBlock(
                            in_channels=in_channels,
                            n_filters=n_filters,
                            kernel_sizes=kernel_sizes,
                            bottleneck_channels=bottleneck_channels,
                            activation=activation,
                            return_indices=return_indices
                            )
        self.inception_2 = InceptionBlock(
                            in_channels=4*n_filters,
                            n_filters=n_filters,
                            kernel_sizes=kernel_sizes,
                            bottleneck_channels=bottleneck_channels,
                            activation=activation,
                            return_indices=return_indices
                            )
        self.inception_3 = InceptionBlock(
                            in_channels=4*n_filters,
                            n_filters=n_filters,
                            kernel_sizes=kernel_sizes,
                            bottleneck_channels=bottleneck_channels,
                            activation=activation,
                            return_indices=return_indices
                            )
        if self.use_residual:
            self.residual = nn.Sequential(
                                nn.Conv1d(
                                    in_channels=in_channels, 
                                    out_channels=4*n_filters, 
                                    kernel_size=1,
                                    stride=1,
                                    padding=0
                                    ),
                                nn.BatchNorm1d(
                                    num_features=4*n_filters
                                    )
                                )

    def forward(self, X):
        Z = self.inception_1(X)
        Z = self.inception_2(Z)
        Z = self.inception_3(Z)
        if self.use_residual:
            Z = Z + self.residual(X)
            Z = self.activation(Z)
        return Z



In [25]:
class Flatten(nn.Module):
    def __init__(self, out_features):
        super(Flatten, self).__init__()
        self.output_dim = out_features

    def forward(self, x):
        return x.view(-1, self.output_dim)

class Reshape(nn.Module):
    def __init__(self, shape):
        super(Reshape, self).__init__()
        self.out_shape = shape

    def forward(self, x):
        return x.view(-1, *self.out_shape)

model = nn.Sequential(
                    Reshape(shape=(1,10_000)),
                    InceptionModule(
                        in_channels=1, 
                        n_filters=32, 
                        kernel_sizes=[9,19,39],
                        bottleneck_channels=32,
                        use_residual=True,
                        activation=nn.ReLU()
                    ),
                    InceptionModule(
                        in_channels=32*4, 
                        n_filters=32, 
                        kernel_sizes=[9,19,39],
                        bottleneck_channels=32,
                        use_residual=True,
                        activation=nn.ReLU()
                    ),
                    nn.AdaptiveAvgPool1d(output_size=1),
                    Flatten(out_features=32*4*1),
                    nn.Linear(in_features=4*32*1, out_features=40)
        )

In [26]:
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print('Number of trainable parameters for the model: %d' % (num_params))

num_params = sum(p.numel() for p in model.parameters() )
print('Number of all parameters for the model: %d' % (num_params))

from torchsummary import summary
summary(model,(1,10000))

In [27]:
LR = 0.0009

model = model.to(device)

criterion = nn.CrossEntropyLoss()
criterion = criterion.to(device)


# trainable_parameters = filter(lambda p: p.requires_grad, model.parameters())
optimizer = optim.SGD(model.parameters(), lr=LR, momentum=0.9)

def initialize_weights(m):
  if isinstance(m, nn.Conv1d):
      nn.init.kaiming_uniform_(m.weight.data,nonlinearity='relu')
      if m.bias is not None:
          nn.init.constant_(m.bias.data, 0)
  elif isinstance(m, nn.BatchNorm1d):
      nn.init.constant_(m.weight.data, 1)
      nn.init.constant_(m.bias.data, 0)      
  if type(m) == nn.Linear:
        nn.init.xavier_uniform(m.weight)   
        if m.bias is not None:
          nn.init.constant_(m.bias.data, 0) 

model.apply(initialize_weights)

In [28]:
def accuracy(y_pred, y):
    cls_pred = y_pred.argmax(1, keepdim=True)    
    correct_cls = cls_pred.eq(y.view_as(cls_pred)).sum()
    acc = correct_cls.float() / y.shape[0]
    return acc
    
#-----------------------------
def train(model, dataloader, optimizer, criterion, metric, device):

    epoch_loss = 0
    epoch_acc  = 0

    model.train()

    for (x, y) in dataloader:#tqdm(dataloader, desc="Training", leave=False):

        x = x.to(device)
        y = y.to(device)

        optimizer.zero_grad()

        y_pred = model.forward(x)

        loss = criterion(y_pred, y)
        acc  = metric( y_pred, y)

        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        epoch_acc  += acc.item()

    return epoch_loss / len(dataloader), epoch_acc / len(dataloader)


#--------------------------    
def asserts(model, dataloader, optimizer, criterion, metric, device):

    for (x, y) in  dataloader:#tqdm(dataloader, desc="Training", leave=False):

        x = x.to(device)
        y = y.to(device)

    return x,y
#--------------------------
def evaluate(model, dataloader, criterion, metric, device):

    epoch_loss = 0
    epoch_acc  = 0

    model.eval()

    with torch.no_grad():
        
        for (x, y) in dataloader:#tqdm(dataloader, desc="Evaluating", leave=False):

            x = x.to(device)
            y = y.to(device)

            y_pred = model.forward(x)

            loss = criterion(y_pred, y)
            acc  = metric( y_pred, y)

            epoch_loss += loss.item()
            epoch_acc  += acc.item()

    return epoch_loss / len(dataloader), epoch_acc / len(dataloader)
#-------------------
def predicts(model, dataloader, device):

    epoch_loss = 0
    epoch_acc  = 0

    model.eval()

    y_s = []
    y_preds = []
    with torch.no_grad():
        
        for (x, y) in dataloader:#tqdm(dataloader, desc="Evaluating", leave=False):

            x = x.to(device)
            y = [y.to(device).data.cpu().numpy()]
          
            y_pred = +[ model.forward(x).data.cpu().numpy() ]

    return np.asarray(y_s), np.asarray(y_preds)
#-------------------
def epoch_time(start_time, end_time):
    elapsed_time = end_time - start_time
    elapsed_mins = int(elapsed_time / 60)
    elapsed_secs = int(elapsed_time - (elapsed_mins * 60))
    return elapsed_mins, elapsed_secs

In [None]:
EPOCHS = 200

train_loss = torch.zeros(EPOCHS)
valid_loss = torch.zeros(EPOCHS)
train_acc  = torch.zeros(EPOCHS)
valid_acc  = torch.zeros(EPOCHS)

best_valid_loss = float('inf')
best_epoch = 0

for epoch in range(EPOCHS):#trange(EPOCHS, desc="Epochs"):

    start_time = time.monotonic()

    train_loss[epoch], train_acc[epoch] = train(model, 
                                                trainloader, 
                                                optimizer, 
                                                criterion,
                                                accuracy,
                                                device)
    
    valid_loss[epoch], valid_acc[epoch] = evaluate(model, 
                                                   valloader, 
                                                   criterion, 
                                                   accuracy,
                                                   device)

    if valid_loss[epoch] < best_valid_loss:
        best_valid_loss = valid_loss[epoch]
        best_epoch = epoch
        torch.save(model.state_dict(), 'best_model.pt')

    epoch_mins, epoch_secs = epoch_time(start_time, time.monotonic())
    # if epoch%2 == 1:    # print every 2 epochs:
    print(f'Epoch: {epoch+1:02} | Epoch Time: {epoch_mins}m {epoch_secs}s')
    print(f'\tTrain Loss: {train_loss[epoch]:.3f} | Train Acc: {train_acc[epoch]*100:.2f}%')
    print(f'\t Val. Loss: {valid_loss[epoch]:.3f} |  Val. Acc: {valid_acc[epoch]*100:.2f}%')

model.load_state_dict(torch.load('best_model.pt'))
test_loss, test_acc = evaluate(model, valloader, criterion, accuracy, device)
print(10*'--',f'\nbest epoch {best_epoch}: Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

In [None]:
x,y = asserts(model, 
              trainloader, 
              optimizer, 
              criterion,
              accuracy,
              device)

In [None]:
print(x.shape,y.shape)

In [None]:
model.load_state_dict(torch.load('best_model.pt'))
test_loss, test_acc = evaluate(model, testloader, criterion, accuracy, device)
print(10*'--',f'\nbest epoch {best_epoch}: Test Loss: {test_loss:.3f} | Test Acc: {test_acc*100:.2f}%')

In [None]:
def predicts(model, dataloader, device):

    epoch_loss = 0
    epoch_acc  = 0

    model.eval()

    y_s = np.array([])
    y_preds = np.array([])
    with torch.no_grad():
        
        for (x, y) in tqdm(dataloader, desc="Evaluating", leave=False):

            x = x.to(device)
            y_s = np.append(y_s,y.to(device).data.cpu().numpy())

            outs = torch.argmax(model.forward(x), dim = -1)

            y_preds = np.append(y_preds, outs.data.cpu().numpy() )
    return np.asarray(y_s), np.asarray(y_preds)

In [None]:
y, y_pred = predicts(model, testloader, device)

In [None]:
plt.plot(y)

In [None]:
plt.plot((y_pred))

In [None]:
df = pd.DataFrame(y_pred)
df.to_csv('inceptiontime.csv')