In [1]:
import pandas as pd
from signalai.config import CONFIG_DIR, PIPELINE_SAVE_PATH
from taskorganizer.pipeline import Pipeline
from signalai.signal_tools.signal import SignalDataset, Signal

import seaborn as sns
from matplotlib import pyplot as plt

import torch 
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from inceptiontime import InceptionBlock, InceptionModule
from tqdm import tqdm, trange
import numpy as np

%load_ext autoreload
%autoreload 2
%config InlineBackend.figure_format = 'svg'

In [2]:
from scipy import signal
def gauss_convolve(arr, window_length, rel_std):
    window = signal.windows.gaussian(window_length, std=window_length * rel_std)
    window = window / np.sum(window)
    c = np.array([np.convolve(arr[i, :], window, mode='same') for i in range(arr.shape[0])])

    return c

In [3]:
config_path = CONFIG_DIR / "processing" / "pipeline.yaml"
params_config_path = CONFIG_DIR / "data_preparation" / "tahovka0.yaml"
pip = Pipeline(
    config_path,
    config_dir=CONFIG_DIR,
    save_folder=PIPELINE_SAVE_PATH,
    parameter_yamls=[params_config_path]
)

device = "cuda"

In [4]:
# df = pip.run("DatasetLoader")
# df.sample(4)
gen_gen = pip.run("data_generator")
train_gen = gen_gen.get_generator("train", log=0, batch_size=16)

Loading datasets ['noise', 'tahovka_normal', 'tahovka_plastized'] to RAM: 100%|██████████| 15/15 [00:01<00:00,  8.68it/s]


In [5]:
%%time
for i in trange(1000):
    X, Y = next(train_gen)
    _=torch.from_numpy(np.array(X)).to(device)
    _=torch.from_numpy(np.array(Y)).to(device)

100%|██████████| 1000/1000 [00:03<00:00, 256.05it/s]

CPU times: user 3.46 s, sys: 414 ms, total: 3.88 s
Wall time: 3.91 s





In [6]:
class Net(nn.Module):
    
    def __init__(self):
        super().__init__()
        self.inception_block1 = InceptionBlock(
            in_channels=1,
            n_filters=32,
            kernel_sizes=[11, 21, 41],
            bottleneck_channels=32,
            use_residual=True,
            activation=nn.SELU()
        )
        self.adaptive_pool = nn.AdaptiveAvgPool1d(output_size=1)
        self.linear1 = nn.Linear(in_features=4 * 32 * 1, out_features=1)
        self.out_activation = nn.Sigmoid()
        
    def forward(self, x):
        x = self.inception_block1(x)
        x = self.adaptive_pool(x)
        x = x.view(-1, 32 * 4 * 1)
        x = self.linear1(x)
        y = self.out_activation(x)
        return y

    
net = Net().to(device)

In [7]:
criterion = nn.BCELoss()
optimizer = optim.Adam(net.parameters(), lr=0.01)

#optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)
losses = []

In [8]:
def evaluate(model, output_name):
    torch.save(model.state_dict(), output_name+".pth")
    model.eval()
    with torch.no_grad():
        all_gen = gen_gen.get_generator(split=None, log=0, batch_size=32, x="X_all", y="Y_all")
        result = []
        for i in range(1072656250 // 32 // 32734):
            X, _ = next(all_gen)
            inputs = torch.from_numpy(np.array(X)).to(device)
            result += list(model(inputs).cpu())

        np_results = np.array([i.numpy() for i in result])[:,0]
        plt.figure(figsize=(16,9))
        sns.lineplot(y=gauss_convolve(np.expand_dims(np_results, 0), 30, 0.8)[0], x=range(len(np_results)))
        plt.savefig(output_name+".svg")

    model.train()

In [None]:
echo_step = 300
save_step = 1500
average_losses = 300
batches_id = trange(3000)

for train_batch in batches_id:  # loop over the dataset multiple times
    
    # optimizer = optim.Adam(net.parameters(), lr=0.01)
    X, Y = next(train_gen)
    inputs = torch.from_numpy(np.array(X)).to(device)
    labels = torch.from_numpy(np.array(Y)).type(torch.float32).unsqueeze(1).to(device)
    # print(labels)
    # zero the parameter gradients
    optimizer.zero_grad()

    # forward + backward + optimize
    outputs = net(inputs)
    loss = criterion(outputs, labels)
    loss.backward()
    optimizer.step()
    # print statistics
    losses.append(loss.item())
    
    mean_loss = np.mean(losses[-average_losses:])
    if mean_loss < 1e-5:
        break
        
    batches_id.set_description(f"Loss: {mean_loss: .08f}")
    if train_batch % echo_step == 0:
        print()
    
    if train_batch % save_step == 0 and train_batch > 0:
        evaluate(net, f"tahovka/{train_batch}-noise-adam0.01")
        
evaluate(net, f"tahovka/{train_batch}-without_noise-adam_0.01")
print('Finished Training')

Loss:  0.67706859:   0%|          | 1/3000 [00:00<19:55,  2.51it/s]




Loss:  0.01696497:  10%|█         | 301/3000 [01:15<11:42,  3.84it/s]




Loss:  0.00295352:  20%|██        | 601/3000 [02:33<10:10,  3.93it/s]




Loss:  0.00155896:  30%|███       | 901/3000 [03:50<08:51,  3.95it/s]




Loss:  0.01259910:  40%|████      | 1201/3000 [05:06<07:43,  3.89it/s]




Loss:  0.01000556:  50%|█████     | 1500/3000 [06:22<06:27,  3.87it/s]




Loss:  0.00039937:  60%|██████    | 1801/3000 [11:11<05:00,  4.00it/s]   




Loss:  0.00078201:  65%|██████▍   | 1949/3000 [11:48<04:22,  4.00it/s]