In [1]:
import pandas as pd
from signalai.config import CONFIG_DIR, PIPELINE_SAVE_PATH
from taskorganizer.pipeline import Pipeline
from signalai.signal_tools.signal import SignalDataset, Signal

import torch 
import torch.nn as nn
import torch.nn.functional as F
from inceptiontime import InceptionBlock
import torch.optim as optim
from tqdm import tqdm, trange
import numpy as np

%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'svg'

In [2]:
config_path = CONFIG_DIR / "processing" / "pipeline.yaml"
params_config_path = CONFIG_DIR / "data_preparation" / "diamond_noise_cropped.yaml"
generator_config_path = CONFIG_DIR / "generators" / "diamond.yaml"
pip = Pipeline(
    config_path,
    config_dir=CONFIG_DIR,
    save_folder=PIPELINE_SAVE_PATH,
    parameter_yamls=[params_config_path, generator_config_path]
)

In [3]:
df = pip.run("DatasetLoader")

In [4]:
df.groupby("interval_start").count()

Unnamed: 0_level_0,dataset,filename,filename_id,channel_id,split,interval_end,interval_length,values,frequency,big_endian,source_dtype,dtype_bytes,signed,op_dtype,to_ram,standardize,adjustment,dataset_id,dataset_total
interval_start,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1
9500,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000,8000


In [5]:
df.query("filename_id=='AEforANN-tube1-diamond-loc05_ch_032.bin-0-train'")

Unnamed: 0,dataset,filename,filename_id,channel_id,split,interval_start,interval_end,interval_length,values,frequency,big_endian,source_dtype,dtype_bytes,signed,op_dtype,to_ram,standardize,adjustment,dataset_id,dataset_total
4124,diamond_loc05,/mnt/AXAGO/Martin/signalai/diamond/AEforANN-tu...,AEforANN-tube1-diamond-loc05_ch_032.bin-0-train,0,train,9500,25884,16384,100000,1562500,True,float32,4,True,float32,True,True,0,5,10
4125,diamond_loc05,/mnt/AXAGO/Martin/signalai/diamond/AEforANN-tu...,AEforANN-tube1-diamond-loc05_ch_032.bin-0-train,1,train,9500,25884,16384,100000,1562500,True,float32,4,True,float32,True,True,0,5,10
4126,diamond_loc05,/mnt/AXAGO/Martin/signalai/diamond/AEforANN-tu...,AEforANN-tube1-diamond-loc05_ch_032.bin-0-train,2,train,9500,25884,16384,100000,1562500,True,float32,4,True,float32,True,True,0,5,10
4127,diamond_loc05,/mnt/AXAGO/Martin/signalai/diamond/AEforANN-tu...,AEforANN-tube1-diamond-loc05_ch_032.bin-0-train,3,train,9500,25884,16384,100000,1562500,True,float32,4,True,float32,True,True,0,5,10


In [6]:
gen_gen = pip.run("data_generator")

Loading datasets ['diamond_loc00', 'diamond_loc01', 'diamond_loc02', 'diamond_loc03', 'diamond_loc04', 'diamond_loc05', 'diamond_loc06', 'diamond_loc07', 'diamond_loc08', 'diamond_loc09'] to RAM: 100%|██████████| 2000/2000 [00:49<00:00, 40.67it/s]


In [24]:
gen_gen.signal_loader.loaded_signals["AEforANN-tube1-diamond-loc00_ch_001.bin-0-valid"]

                                                               0
dataset                                            diamond_loc00
filename_id      AEforANN-tube1-diamond-loc00_ch_001.bin-0-valid
split                                                      valid
interval_start                                              9500
interval_end                                               25884
interval_length                                            16384
values                                                    100000
frequency                                                1562500
big_endian                                                  True
source_dtype                                             float32
dtype_bytes                                                    4
signed                                                      True
op_dtype                                                 float32
to_ram                                                      True
standardize              

In [14]:
gen = gen_gen.get_generator("train", log=0, batch_size=16)
#val_gen = gen_gen.get_generator("train", log=0)

In [15]:
#for i, j in enumerate(gen.tracks['t0']['datasets']['diamond_loc01'].loaded_signals.values()):
    #print(j.next(gen)signal)
    #if i == 200:
        #break

In [17]:
%%time
for i in tqdm(range(2000)):
    X, Y = next(gen)
    _=torch.from_numpy(np.array(X)).to("cuda")
    _=torch.from_numpy(np.array(Y)).to("cuda")
        

100%|██████████| 2000/2000 [00:05<00:00, 381.27it/s]

CPU times: user 5.22 s, sys: 27.4 ms, total: 5.25 s
Wall time: 5.25 s





In [10]:
class Flatten(nn.Module):
	def __init__(self, out_features):
		super(Flatten, self).__init__()
		self.output_dim = out_features

	def forward(self, x):
		return x.view(-1, self.output_dim)

In [11]:
device = "cuda"

In [12]:
net = nn.Sequential(
    InceptionBlock(
        in_channels=4,
        n_filters=32,
        kernel_sizes=[5, 11, 23],
        bottleneck_channels=32,
        use_residual=True,
        activation=nn.SELU()
    ),
    InceptionBlock(
        in_channels=32 * 4,
        n_filters=32,
        kernel_sizes=[5, 11, 23],
        bottleneck_channels=32,
        use_residual=True,
        activation=nn.SELU()
    ),
    nn.AdaptiveAvgPool1d(output_size=1),
    Flatten(out_features=32 * 4 * 1),
    nn.Linear(in_features=4 * 32 * 1, out_features=11),
    nn.Softmax(dim=-1)
).to(device)

In [13]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

running_loss = 0.0

In [14]:
echo_step = 400
batches_id = trange(4000)
for train_batch in batches_id:  # loop over the dataset multiple times

    X, Y = next(gen)
    inputs = torch.from_numpy(np.array(X)).to(device)
    labels = torch.from_numpy(np.array(Y)).to(device)
    # print(labels)
    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()

    # print statistics
    running_loss += loss.item()
    batches_id.set_description(f"Loss: {loss.item(): .04f}")
    if train_batch % echo_step == 0:
        print()

print('Finished Training')

  return torch.max_pool1d(input, kernel_size, stride, padding, dilation, ceil_mode)
Loss:  2.3968:   0%|          | 1/4000 [00:00<25:43,  2.59it/s]




Loss:  2.3483:  10%|█         | 401/4000 [01:34<14:04,  4.26it/s]




Loss:  2.2613:  20%|██        | 801/4000 [03:08<12:26,  4.29it/s]




Loss:  2.2127:  30%|███       | 1201/4000 [04:45<10:55,  4.27it/s]




Loss:  2.1360:  40%|████      | 1601/4000 [06:19<09:25,  4.25it/s]




Loss:  2.0759:  50%|█████     | 2001/4000 [07:53<07:54,  4.22it/s]




Loss:  2.0857:  60%|██████    | 2401/4000 [09:28<06:16,  4.25it/s]




Loss:  2.0274:  70%|███████   | 2801/4000 [11:02<04:37,  4.31it/s]




Loss:  1.8874:  80%|████████  | 3201/4000 [12:36<03:10,  4.18it/s]




Loss:  1.8857:  90%|█████████ | 3601/4000 [14:10<01:34,  4.24it/s]




Loss:  1.8550: 100%|██████████| 4000/4000 [15:44<00:00,  4.24it/s]

Finished Training





In [15]:
with torch.no_grad():
    inputs, labels = next(val_gen)
    inputs = torch.tensor(inputs, device="cuda", dtype=torch.float32).unsqueeze(0)
    labels = torch.tensor(labels, device="cuda", dtype=torch.long).unsqueeze(0)
    print(net(inputs))

NameError: name 'val_gen' is not defined

In [None]:
s,_=next(gen)
s=Signal(s)

In [None]:
128**2

In [None]:
s.show()

In [None]:
s.signal