In [None]:
#default_exp datablock

# DataBlock

> process timeseries datasets with DataBloc

In [None]:
#using the NumpyDataLoader in tsai is significantly faster than using the fastaiv2 DataLoader, 
#maybe build a custom dataloader to optimize for odds data
import tsai
from tsai.all import *

In [None]:
print('tsai       :', tsai.__version__)
print('fastai2    :', fastai2.__version__)
print('fastcore   :', fastcore.__version__)
print('torch      :', torch.__version__)
print('scipy      :', sp.__version__)
print('numpy      :', np.__version__)
print('pandas     :', pd.__version__)
print(f'Total RAM  : {bytes2GB(psutil.virtual_memory().total):5.2f} GB')
print(f'Used RAM   : {bytes2GB(psutil.virtual_memory().used):5.2f} GB')
print('n_cpus     :', cpus)
iscuda = torch.cuda.is_available()
if iscuda: print('device     : {} ({})'.format(device, torch.cuda.get_device_name(0)))
else: print('device     :', device)

tsai       : 0.1.0
fastai2    : 0.0.18
fastcore   : 0.1.18
torch      : 1.3.1
scipy      : 1.4.1
numpy      : 1.18.1
pandas     : 0.25.3
Total RAM  : 15.56 GB
Used RAM   :  7.07 GB
n_cpus     : 4
device     : cuda (GeForce GTX 950)


In [None]:
#export
from torchtools.data import *
from torchtools.datasets import *
from torchtools.augmentations import *

from torchtools.datablock import *

In [None]:
from torchtools.models import *
from torchtools.core import *

In [None]:
import pandas as pd
import numpy as np
from functools import partial

In [None]:
_verbose=True

### Get Data

In [None]:
df_main = pd.read_csv('./data/custom/bi_sample_anon.csv', nrows=200000)

In [None]:
## simple config
x_cols = [[f'x{i}_{j}' for j in range(10)] for i in range(6)]
dep = 'y0'
n_train = 8000

items is a list of (x,y) tuples  
methods called when passing items to DataBlock:
- DataBlock.datasets(items, ....)
- Dataset(items, ...)
- TfmdList(items, ...)
- L(items, ...)
- CollBase.__init__(items, ...)



### TSAI 

In [None]:
items_0 = df_to_items(df_main, x_cols, dep, n_train)[0]

(200000, 6, 10)


In [None]:
#export
def _items_to_arrays(items):
#     return np.stack([x[0] for x in items]), np.stack([x[1] for x in items])
    return tuple(np.stack([x[i] for x in items]) for i in range(len(items[0])))

In [None]:
X,y = _items_to_arrays(items_0)

In [None]:
X.shape, y.shape

((200000, 6, 10), (200000,))

In [None]:
##use FixedSplitter
splits = [L(range(160000)), L(range(160000,185000))]
splits = FixedSplitter()(df_main)
# splits = [L(range(1600)), L(range(1600,1850))]

tfms  = [None, None]
dsets = TSDatasets(X, y, tfms=tfms, splits=splits, inplace=True)

In [None]:
splits

((#10000) [0,1,2,3,4,5,6,7,8,9...],
 (#5000) [10000,10001,10002,10003,10004,10005,10006,10007,10008,10009...])

In [None]:
# this is still a hack to transform the tsai tensor types to torchtool tensor types
# not needed when using adapted TSDataloader
@Transform
def ToTT(x:tsai.data.core.TSTensor):
    print('ToTT')
    return cast(x, TSTensor)

ToTT.order = 150
ToTT.split_idx = None

In [None]:
#int
@Transform
def ToTTInt(x:tsai.data.core.TSTensor):
    print('ToTT')
    return cast(x, TSIntTensor)

ToTT.order = 150
ToTT.split_idx = 0

In [None]:
# dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[256, 512], num_workers=0)
dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[256, 512], num_workers=0, 
#                                after_batch=[TSNormalize(by_var=False)
                               batch_tfms=[TSNormalize(by_sample=False, by_var=True), ToTT])

In [None]:
dls[0].one_batch()

ToTT


(TSTensor([[[ 1.0000,  1.0000,  0.5974,  ...,  0.5205,  0.3609,  1.0000],
          [ 1.0000,  1.0000,  0.3072,  ...,  0.3072, -0.3464,  1.0000],
          [ 0.0769,  0.0769, -0.0769,  ..., -0.2308, -0.0769,  0.0769],
          [ 1.0000,  0.6395, -0.0909,  ...,  0.5763,  0.3912,  0.6203],
          [ 1.0000, -0.6987, -0.4774,  ...,  1.0000, -0.4533,  0.2195],
          [ 0.1667, -0.5000,  0.0000,  ...,  0.0000,  0.0000, -0.3333]],
 
         [[ 0.3768,  0.5521,  1.0000,  ...,  1.0000,  1.0000,  0.5102],
          [-0.4891, -0.5481,  1.0000,  ...,  0.3072,  0.3072,  0.3072],
          [-0.2308, -0.2308,  0.2308,  ...,  0.0769,  0.3846, -0.0769],
          [ 0.3082,  0.3522,  0.4303,  ...,  0.5289,  1.0000,  1.0000],
          [-0.6197, -0.5168,  0.2195,  ...,  0.2195,  0.2195,  0.2195],
          [ 0.0000,  0.0000,  0.0000,  ..., -0.1667,  0.1667,  0.3333]],
 
         [[ 0.5771,  1.0000,  1.0000,  ...,  1.0000,  1.0000,  0.6624],
          [-0.1003,  1.0000,  1.0000,  ...,  1.0000,  0.

In [None]:
# #export
# _batch_tfms = ('after_item','before_batch','after_batch')

# class NumpyDataLoaders(DataLoaders):
#     _xblock = NumpyTensorBlock
#     _dl_type = NumpyDataLoader 
#     def __init__(self, *loaders, path='.', device=default_device()):
#         self.loaders,self.path = list(loaders),Path(path)
#         self.device = device
        
#     @classmethod
#     @delegates(DataLoaders.from_dblock)
#     def from_numpy(cls, X, y=None, splitter=None, valid_pct=0.2, seed=0, item_tfms=None, batch_tfms=None, **kwargs):
#         "Create timeseries dataloaders from arrays (X and y, unless unlabeled)"
#         if splitter is None: splitter = RandomSplitter(valid_pct=valid_pct, seed=seed)
#         getters = [ItemGetter(0), ItemGetter(1)] if y is not None else [ItemGetter(0)]
#         dblock = DataBlock(blocks=(cls._xblock, CategoryBlock),
#                            getters=getters,
#                            splitter=splitter,
#                            item_tfms=item_tfms,
#                            batch_tfms=batch_tfms)

#         source = itemify(X) if y is None else itemify(X,y)
#         return cls.from_dblock(dblock, source, **kwargs)

In [None]:
norm = TSNormalize(by_sample=False, by_var=True)
tszero = TimestepZero(verbose=True)

In [None]:
dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[256, 512], num_workers=0, 
                              after_batch=Pipeline(noop),
                            batch_tfms=[norm, ToTT, tszero])

In [None]:
dls[1].one_batch()

(TSTensor(samples:512, vars:6, len:10),
 tensor([ -41.6667,  100.0000, -131.5789,  100.0000,  100.0000,  -25.6410,
          100.0000,  -25.3165,  100.0000,  -32.7869,  100.0000, -256.4102,
          -35.7143,  100.0000,  100.0000,  -21.2766,  100.0000,   -6.7797,
         -153.8462,  100.0000,  100.0000,  -34.6021,  100.0000,  -25.7732,
          100.0000,  -18.1159,  100.0000,  -64.9351, -163.9344,  100.0000,
          100.0000,  -16.5563,  -96.1538,  100.0000,  -94.3396,  -29.6736,
          -72.4638,  100.0000,  100.0000,  -77.5194, -131.5789,  100.0000,
          -64.1026,  100.0000,  100.0000,  -36.1011,  -63.6943,  -49.7512,
          100.0000,  -41.8410,  -34.6021,  100.0000,  -15.1515,  100.0000,
          100.0000,  -21.6920,  -76.3359,  -35.2113,  100.0000,  -41.1523,
          100.0000,  -32.7869,  100.0000,  -28.0112, -322.5807,   -9.0909,
          -56.1798,  -53.4759,  -66.6667,  -50.2513,  -48.3092,  -54.3478,
          100.0000,  -27.0270,  100.0000,  -20.9644,  100.00

In [None]:
model = InceptionTimeSgm(6,1)
loss_fn = get_loss_fn('leaky_loss', alpha=0.5)
learn_small = Learner(model=model, loss_func=loss_fn, dls=dls, metrics=unweighted_profit)

In [None]:
learn_small.fit_one_cycle(5)

epoch,train_loss,valid_loss,unweighted_profit,time
0,0.093532,-0.12388,-0.23944,00:02
1,-0.903068,0.453862,-0.504787,00:02
2,-1.856171,0.208542,-0.520914,00:01
3,-3.293418,-0.51357,0.087065,00:02
4,-5.731512,-0.208174,-0.307485,00:02


ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timest

ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])
ToTT
timestepzero
torch.Size([256, 6, 10])


In [None]:
preds = learn_small.get_preds(1)

In [None]:
unweighted_profit(*preds)

tensor(-0.3075)

#### Modelling with RandAugment

In [None]:
tfms = all_erasing_augs(magnitude=0.4) + all_noise_augs(magnitude=0.4) + all_zoom_augs(magnitude=0.4)

In [None]:
splits = [L(range(160000)), L(range(160000,185000))]
# splits = [L(range(1600)), L(range(1600,1850))]

# tfms  = [None, None]
dsets = TSDatasets(X, y, tfms=tfms, splits=splits, inplace=True)

# dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[256, 512], num_workers=0)
dls = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[256, 256], num_workers=0, 
#                                after_batch=[TSNormalize(by_var=False)
                               batch_tfms=[TSNormalize(by_sample=False, by_var=True), ToTT, 
                                           RandAugment(N=3, magnitude=0.4, tfms=tfms)
                                           ])

tfms [Dimout: (TSTensor,object) -> encodes , Cutout: (TSTensor,object) -> encodes , TimestepZero: (TSTensor,object) -> encodes , Crop: (TSTensor,object) -> encodes , RandomCrop: (TSTensor,object) -> encodes , Maskout: (TSTensor,object) -> encodes , YWarp: (TSTensor,object) -> encodes , YNormal: (TSTensor,object) -> encodes , YScale: (TSTensor,object) -> encodes , TimeWarp: (TSTensor,object) -> encodes , TimeNormal: (TSTensor,object) -> encodes , Zoomin: (TSTensor,object) -> encodes , Zoomout: (TSTensor,object) -> encodes , RandZoom: (TSTensor,object) -> encodes , RandTimesteps: (TSTensor,object) -> encodes ]


In [None]:
xb, yb = dls[0].one_batch()

ToTT
[0 2 5 6 7 8]
torch.Size([256, 6, 10])


In [None]:
dls.after_batch.tfms[3].magnitude

0.4

In [None]:
# dls.show_batch()

In [None]:
set_seed(1234)

In [None]:
## for tsai
model = InceptionTimeSgm(6,1)
learn = Learner(dls, model, loss_fn, metrics=[unweighted_profit, partial(unweighted_profit, threshold=0.5)])

In [None]:
learn.fit_one_cycle(2, lr_max=1e-5, wd=0.03)

epoch,train_loss,valid_loss,unweighted_profit,unweighted_profit.1,time
0,-0.058872,-0.079185,-0.137525,0.0,00:41
1,-0.208804,-0.106575,-0.03804,0.0,00:39


ToTT
[4 5 6 7 8 9]
[3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[1 2 3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[0 1 3 6 8 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
ToTT
ToTT
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[0 1 2 3 6 8]
ToTT
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
[3 4 5 6 7 8 9]
ToTT
[0 1 2 3 4 5 6 7 8]
[2 3 4 5 6 7 8 9]
ToTT
[0 1 2 3 4 6]
[1 2 3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
[1 2 3 4 5 6 7 8 9]
ToTT
[5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
[3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
ToTT
torch.

ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
[1 2 3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[1 2 3 5 8 9]
ToTT
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8]
ToTT
torch.Size([256, 6, 10])
ToTT
[0 1 3 4 6 8]
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[1 2 3 4 5 6 7 8 9]
ToTT
[1 4 5 7 8 9]
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[3 4 5 6 7]
ToTT
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[2 3 4 5 7 9]
ToTT
[5 6 7 8 9]
torch.Size([256, 6, 10])
[0 1 3 4 5 6]
ToTT
[2 3 4 5 6]
[5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[2 3 4 6 7 8]
ToTT
[1 2 3 4 5 6 7 8 9]
[0 4 5 7 8 9]
ToTT
[0 1 4 6 7 8]
torch.Size([256, 6, 10])
ToTT
[3 4 5 6 7 8 9]
[2 3 4 5 6 

ToTT
[2 3 4 5 6 7 8 9]
[4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[2 3 4 5 6]
ToTT
torch.Size([256, 6, 10])
[3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
[0 1 2 3 4 5 6 7 8]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
[1 2 3 4 6 9]
ToTT
torch.Size([256, 6, 10])
[0 1 2 3 4 5 6 7 8]
ToTT
[0 1 2 3 4 5 6 7 8]
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[2 3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
[4 5 6 7 8 9]
[3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[0 1 2 3 4 5 6 7 8]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
[3 4 5 6 7 8 

torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
ToTT
[2 3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[0 1 2 3 4 5 6 7]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
ToTT
[0 1 2 3 4 5 6 7 8]
ToTT
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[2 3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[1 3 5 6 7 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6]
[1 3 4 6 7 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[2 3 4 5 6 7 8 9]
[2 4 5 6 7 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[5 6 7 8 9]
torch.Size([256, 

ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[2 3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
[1 2 3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
[0 1 2 3 4 5 6 7 8]
ToTT
torch.Size([256, 6, 10])
[5 6 7 8 9]
ToTT
[2 3 4 5 6 7 8]
ToTT
torch.Size([256, 6, 10])
[0 1 2 3 4 5 6 7 8]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
[0 1 4 5 7 9]
ToTT
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
[0 3 4 5 6 8]
ToTT
torch.Size([256, 6, 10])
[0 1 2 3 4 5]
ToTT
[5 6 7 8 9]
[0 1 2 3 4 5 6]
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[3 4 5 6 7 8 9]
ToTT
[1 2 4 6 7 8]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[0 1 2 3 4 5]
ToTT
[0 1 2 3 4 5 6 7 8]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[2 3 4 5 6 7 8 9]
ToTT
[0 1 2 3 4 5 6 7]
[0 3 5 7 8 9]
ToTT
[0 2 3 6 8 9]
torch.Size([256, 6, 10])
ToTT
t

ToTT
torch.Size([256, 6, 10])
[3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
[2 3 4 5 6 7 8 9]
ToTT
[1 2 3 4 5 6 7 8 9]
[0 1 2 3 5 8]
ToTT
torch.Size([256, 6, 10])
[0 1 3 5 7 8]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
ToTT
[2 3 4 5 6 7 8 9]
ToTT
[3 4 5 6 7 8]
ToTT
[0 1 2 3 4 5 6 7]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[0 1 3 5 7 8]
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
ToTT
[2 3 5 7 8 9]
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
ToTT
[4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[0 1 2 3 4 5 6 7]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
[1 2 3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[2 3 4 5 6 7 8]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[2 3 4 5 6 7 8 9]
[1 2 3 4 7 8]
ToTT
torch.Size([256

ToTT
torch.Size([256, 6, 10])
[0 3 6 7 8 9]
[2 3 4 5 6 7]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[2 3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
[3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[0 2 4 6 7 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
[0 1 2 4 6 8]
ToTT
[1 4 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
[1 3 5 6 8 9]
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
[2 3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
[2 3 4 5 6 7]
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[4 5 6 7 8 9]
[0 1 2 3 4 5 6 7

ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
[2 3 4 5 6 7 8 9]
[0 1 5 6 7 8]
ToTT
[0 1 2 3 4 5]
[2 4 5 6 7 8]
ToTT
[4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[1 2 3 4 5 6 7 8 9]
ToTT
[0 2 4 5 6 8]
[0 1 2 3 4 5 6 7 8]
ToTT
[0 1 2 3 4 5 6 7 8]
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
[2 3 4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
[0 1 4 6 8 9]
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
[4 5 6 7 8 9]
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
[1 2 3 4 5 6 7 8 9]
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
torch.Size([256, 6, 10])
ToTT
torch.Size([256, 6, 10])
ToTT
[0 1 2 6 7 9]
[1 2 3 4 5 6 7 8 

In [None]:
preds = learn.get_preds()

In [None]:
(preds[0]>-0.5).sum()

tensor(24632)

In [None]:
unweighted_profit(*preds, threshold=0)

tensor(-0.0380)

### Discrete

oguiza TSDatasets, possible modification
- allow to directly use torchtools tensor types
- allow more than one X

- toarray, to3darray, totensor in tsai.utils useful for handling cpu, cuda, 2d, 3d cases, also for augmentations
- to3darray: a[:, None] assumes that the 2d array has batch, sequence dimensions, i.e. channel dimension missing

In [None]:
#int
x_cont, x_dis, dep, n_train = get_discrete_config()

In [None]:
#int
items_d, _ = df_to_items_discrete(df_main, [x_cont, x_dis], dep, n_train)

#int
X_cont, X_dis, y = _items_to_arrays(items_d)
X_cont.shape, X_dis.shape, y.shape

assert np.issubdtype(X_dis.dtype, np.integer), 'discrete array dtype should be an np.integer subtype'

(200000, 4, 10) (200000, 2, 10)


In [None]:
a = np.random.randn(2,10)

In [None]:
a.shape, a[None].shape, a[:, None].shape

((2, 10), (1, 2, 10), (2, 1, 10))

In [None]:
type(dsets.items[0][0])

numpy.ndarray

NOTE: slicing into the torchtools tensory types does not preserve the type but casts them to pytorch tensors
tsai subtypes define __getitem__ like below for that reason
```
def __getitem__(self, idx):
        res = super().__getitem__(idx)
        return res.as_subclass(type(self))
```

In [None]:
class TSDatasets(NumpyDatasets):
    "A dataset that creates tuples from X (and y) and applies `item_tfms`"
    _xtype, _xdistype, _ytype = TSTensor, TSIntTensor, None # Expected X and y output types (torch.Tensor - default - or subclass)
    def __init__(self, X=None, X_dis=None, y=None, items=None, sel_vars=None, sel_steps=None, tfms=None, tls=None, n_inp=None, dl_type=None,
                 inplace=True, **kwargs):
        self.inplace = inplace
        if tls is None:
            X = itemify(to3darray(X), tup_id=0)
            X_dis = itemify(to3darray(X_dis), tup_id=0) if X_dis is not None else X_dis
            y = itemify(y, tup_id=0) if y is not None else y
            items = tuple((X,)) if y is None else tuple((X,y))
            if X_dis is not None: items = tuple((X, X_dis, y)) if y is not None else tuple(X, X_dis,)
            self.tfms = L(ifnone(tfms,[None]*len(ifnone(tls,items))))
            
#         if X_dis is not None: self.X_dis = X_dis
       
        self.sel_vars = ifnone(sel_vars, slice(None))
        self.sel_steps = ifnone(sel_steps,slice(None))
        self.tls = L(tls if tls else [TfmdLists(item, t, **kwargs) for item,t in zip(items,self.tfms)])
        self.n_inp = (1 if len(self.tls)==1 else len(self.tls)-1) if n_inp is None else n_inp
        if len(self.tls[0]) > 0:
            _tls_types = [self._xtype, self._ytype] if len(self.tls)==2 else [self._xtype, self._xdistype, self._ytype]
#             print(_tls_types)
#             print(len(self.tls))
#             for tl,_typ in zip(self.tls, _tls_types):
#                 print (len(tl), _typ, type(tl[0]), isinstance(tl[0], torch.Tensor))
            self.types = L([ifnone(_typ, type(tl[0]) if isinstance(tl[0], torch.Tensor) else tensor) for 
                            tl,_typ in zip(self.tls, _tls_types)])
            self.ptls = L([tl if not self.inplace else tl[:] if type(tl[0]).__name__ == 'memmap' else 
                           tensor(stack(tl[:])) for tl in self.tls])

    def __getitem__(self, it):
        
#         for i,(ptl,typ) in enumerate(zip(self.ptls,self.types)):
#             print (i, typ)
        
#         return tuple([typ(ptl[it])[...,self.sel_vars, self.sel_steps] if i==0 else 
#                       typ(ptl[it]) for i,(ptl,typ) in enumerate(zip(self.ptls,self.types))])
        ## do not enable slicing for now 
        return tuple([typ(ptl[it]) for i,(ptl,typ) in enumerate(zip(self.ptls,self.types))])
    

    def subset(self, i): return type(self)(tls=L(tl.subset(i) for tl in self.tls), n_inp=self.n_inp, 
                                           inplace=self.inplace, tfms=self.tfms,
                                           sel_vars=self.sel_vars, sel_steps=self.sel_steps)
    @property
    def vars(self): return self[0][0].shape[-2]
    @property
    def len(self): return self[0][0].shape[-1]


In [None]:
splits = FixedSplitter()(df_main)
list(map(len,splits))

[10000, 5000]

In [None]:
## smaller ground dataset
x1, x2, y2 = X_cont[:20000], X_dis[:20000], y[:20000]

In [None]:
tfms=None
dsets_d = TSDatasets(X=x1, X_dis=x2, y=y2, tfms=tfms, splits=FixedSplitter()(df_main), inplace=True)

In [None]:
dsets_d.tfms

(#3) [None,None,None]

In [None]:
norm = TSNormalize(by_sample=False, by_var=True)
tszero = TimestepZero(verbose=True)

In [None]:
dls_d = TSDataLoaders.from_dsets(dsets_d.train, dsets_d.valid, bs=[256, 512], num_workers=0, 
                              after_batch=Pipeline(noop),
                            batch_tfms=[norm, ToTT, tszero])

In [None]:
xc, xd, y = dls_d[0].one_batch()

timestepzero
torch.Size([256, 4, 10])


In [None]:
xc.shape, xd.shape, y.shape

(torch.Size([256, 4, 10]), torch.Size([256, 2, 10]), torch.Size([256]))

In [None]:
#export
class InceptionTimeD(nn.Module):
    '''
    add a sigmoid layer to InceptionTime to get the ouput in a certain range
    '''
    
    def __init__(self, n_in, n_out):
        super().__init__()
        self.mod = nn.Sequential(InceptionTime(n_in, n_out), Sigmoid(-1., 1.))
        
    def forward(self, xc, xd):
        x = torch.cat([xc.float(), xd.float()], dim=-2)
        x = x.float()
        print(f'InceptionTimeSgm dtype {x.dtype}')
        return self.mod(x)

In [None]:
model = InceptionTimeD(6,1)
loss_fn = get_loss_fn('leaky_loss', alpha=0.5)

In [None]:
learn = Learner(dls_d, model, loss_fn, metrics=[unweighted_profit, partial(unweighted_profit, threshold=0.5)])

In [None]:

learn.fit_one_cycle(5)

epoch,train_loss,valid_loss,unweighted_profit,unweighted_profit.1,time
0,-0.169455,-0.461633,0.340326,0.243023,00:02
1,-0.226074,-0.585204,0.308796,0.214964,00:02
2,-0.313402,-0.615366,0.289434,0.230191,00:02
3,-0.403389,-0.668161,0.271083,0.229616,00:02
4,-0.507796,-0.660136,0.275134,0.229616,00:02


timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([

timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([256, 4, 10])
InceptionTimeSgm dtype torch.float32
timestepzero
torch.Size([