In [1]:
import pandas as pd
from google.colab import drive
import os
import torch
from torch import nn
import torch.optim as optim
from torch.utils.tensorboard import SummaryWriter
from torch.utils.data import DataLoader
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from sklearn.metrics import matthews_corrcoef # as mcc
from logging import logMultiprocessing
import torchvision
import numpy as np
import matplotlib.pyplot as plt

In [2]:
drive.mount("/content/Drive")
path_to_folder = os.path.join("Drive", "MyDrive", "data")

Mounted at /content/Drive


In [3]:
list_of_filenames = os.listdir(path_to_folder)

Откроем один файл

In [6]:
i = 0
path_to_file = os.path.join(path_to_folder, list_of_filenames[i])

In [7]:
path_to_file

'Drive/MyDrive/data/01122020_Block1_labels.txt'

In [9]:
data = pd.read_csv(path_to_file)
data.head()

Unnamed: 0,Time_ms,Voltage,labels
0,0.0,0.002348,0
1,0.1,0.002658,0
2,0.2,0.002348,0
3,0.3,0.001568,0
4,0.4,0.001408,0


In [12]:
voltage = data['Voltage'].to_numpy()
labels = data['labels'].to_numpy()
print(len(voltage), len(labels))

459776 459776


# Datasets and dataloaders

In [4]:
def make_patches(voltage, labels, patch_len = 100, stride = 10):
  voltage_patches = []
  label_patches = []
  length = len(voltage)
  length = length - (length % patch_len)
  for i in range(0, length, stride):
    v_p = voltage[i:i+patch_len]
    l_p = labels[i:i+patch_len]
    if sum(l_p) == 0 or l_p[0] == 1 or l_p[-1] == 1:
      continue
    voltage_patches.append(v_p)
    label_patches.append(l_p)
  return voltage_patches, label_patches

In [5]:
def load_group(list_of_filenames, path_to_folder):
  all_voltage_patches = []
  all_label_patches = []
  for filename in list_of_filenames:
    path_to_file = os.path.join(path_to_folder, filename)
    data = pd.read_csv(path_to_file)
    voltage = torch.tensor(data['Voltage'].values, dtype=torch.float32)
    labels = data['labels'].to_numpy()
    voltage_patches, label_patches = make_patches(voltage, labels)
    all_voltage_patches += voltage_patches
    all_label_patches += label_patches
  return all_voltage_patches, all_label_patches

In [6]:
class MyDataset(Dataset):
  def __init__(self, Xs, ys):
    self.data = Xs
    self.labels = ys
  def __len__(self):
        return len(self.data)
  def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [7]:
all_voltage_patches, all_label_patches = load_group(list_of_filenames, path_to_folder)
len(all_voltage_patches)

14092

In [8]:
X_train, X_test, y_train, y_test = train_test_split(all_voltage_patches, all_label_patches,
                                                    random_state=42, shuffle=True, test_size=0.2)

In [9]:
train_dataset = MyDataset(X_train, y_train)
val_dataset = MyDataset(X_test, y_test)

In [10]:
train_loader = DataLoader(train_dataset, batch_size = 50,shuffle =True)
val_loader = DataLoader(val_dataset, batch_size = 10,shuffle =False)

In [37]:
batch, labels = next(iter(train_loader))
labels[1]

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0])

# Models

N′=1+(N−F)/S.

In [11]:
class MiniUnet(nn.Module):

    def __init__(self):
        super(MiniUnet, self).__init__()
        self.maxpool = nn.MaxPool1d(kernel_size=2, stride=2)
        self.downblock = downblock()
        self.bottleneck_conv = nn.Conv1d(in_channels=50, out_channels=50, kernel_size=3, stride=1, padding=1, dilation=1)
        self.upblock = upblock()
        self.last_conv = nn.Conv1d(in_channels=10, out_channels=1, kernel_size=1, stride=1, padding=0, dilation=1)
        
    def forward(self, x):
        x = x.unsqueeze(1)
        #print(x.shape)
        layer1 = self.downblock.forward(x) #100*1 -> 100*50
        #print(x.shape)
        x = self.maxpool(layer1) #100*50 -> 50*50
        #print(x.shape)
        x = self.bottleneck_conv(x) #50*50 -> 50*50
        #print(x.shape)
        x = self.upblock(x, layer1) #50*50 -> 100*10
        #print(x.shape)
        out = self.last_conv(x) #100*10 -> 100*1
        #print(out.shape)

        return out #


class downblock(nn.Module):

    def __init__(self):
        super(downblock, self).__init__()
        self.conv01 = nn.Conv1d(in_channels=1, out_channels=50, kernel_size=11, stride=1, padding=5, dilation=1)
        self.bn01 = nn.BatchNorm1d(50)
        self.conv02 = nn.Conv1d(in_channels=50, out_channels=50, kernel_size=11, stride=1, padding=5, dilation=1) 
        self.bn02 = nn.BatchNorm1d(50)
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.conv01(x)
        x = self.bn01(x)
        x = self.relu(x)
        x = self.conv02(x)
        x = self.bn02(x)
        out = self.relu(x)

        return out

class upblock(nn.Module):

    def __init__(self):
      super(upblock, self).__init__()
      self.up = nn.Upsample(scale_factor=2, mode='linear')
      self.conv01 = nn.Conv1d(in_channels=100, out_channels=50, kernel_size=11, stride=1, padding=5, dilation=1)
      self.bn01 = nn.BatchNorm1d(50)
      self.conv02 = nn.Conv1d(in_channels=50, out_channels=10, kernel_size=11, stride=1, padding=5, dilation=1)
      self.bn02 = nn.BatchNorm1d(10)
      self.relu = nn.ReLU()
      

    def forward(self, x, layer):
      x = self.up(x)
      #print(x.shape)
      x = torch.cat((x, layer), dim=1)
      x = self.conv01(x)
      x = self.bn01(x)
      x = self.relu(x)
      x = self.conv02(x)
      x = self.bn02(x)
      out = self.relu(x)  

      return out 

In [None]:
# Fix https://stackoverflow.com/questions/60730544/tensorboard-colab-tensorflow-api-v1-io-gfile-has-no-attribute-get-filesystem
import tensorflow as tf
import tensorboard as tb
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile

import os
def reinit_tensorboard(clear_log = True):
  logs_base_dir = "runs"
  if clear_log:
    !rm -rfv {logs_base_dir}/*
    os.makedirs(logs_base_dir, exist_ok=True)
  %load_ext tensorboard
  %tensorboard --logdir {logs_base_dir}

In [12]:
def IoU_fun(outputs, labels):
  outputs = outputs.cpu().detach().numpy()
  outputs = outputs > 0.5
  labels = labels.cpu().detach().numpy().astype('int')
  #print(outputs, labels)
  #print((outputs & labels).sum())
  #print((outputs | labels).sum())
  if (outputs | labels).sum() == 0:
    IoU = 0
  else:
    IoU = (outputs & labels).sum() / (outputs | labels).sum()
  return IoU

In [16]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')


def show_results(model,epoch, criterion):
  '''
    Write obtained masks to tensorboard log
  '''
  losses = []
  IoUs = []
  for voltage, label in val_loader:
    outputs = model(voltage.to(device))
    label = label.to(device).unsqueeze(1)
    #print(label.size(), outputs.size())
    loss = criterion(outputs, label.float())
    losses.append(loss.item())
    IoU = IoU_fun(outputs, label)
    IoUs.append(IoU)
  #writer.add_image("Images", torchvision.utils.make_grid(im),epoch)
  #writer.add_image("Masks", torchvision.utils.make_grid(outputs),epoch)
  #writer.flush()
  #writer.close()
  losses = sum(losses)/len(losses)
  IoUs = sum(IoUs)/len(IoUs)
  print("loss for epoch", epoch, "is", losses)
  print("IoU is", IoUs)
  return losses, IoUs

def train(model):
 
  model.train()
  model.to(device)

  criterion = nn.BCEWithLogitsLoss()
  optimizer = optim.Adam(model.parameters(), lr=1e-5)
  best_accuracy = 0
  
  list_of_losses = []
  list_of_IoU = []

  for epoch in range(50):
    for voltage, label in train_loader:
      model.train()
      voltage = voltage.to(device)
      label = label.to(device).unsqueeze(1)
      optimizer.zero_grad()
      output = model(voltage)
      loss = criterion(output, label.float())
      loss.backward()
      optimizer.step()

    model.eval()
    losses, IoU = show_results(model,epoch, criterion)
    list_of_losses.append(losses)
    list_of_IoU.append(IoU)
  return(list_of_losses, list_of_IoU)

In [17]:
model = MiniUnet()
train(model)

loss for epoch 0 is 0.49640659332698117
IoU is 0.7542950540844237
loss for epoch 1 is 0.4744671425709488
IoU is 0.8183284924147575
loss for epoch 2 is 0.46471121886097794
IoU is 0.8375048480376933
loss for epoch 3 is 0.4571142480939838
IoU is 0.8458625273243664
loss for epoch 4 is 0.45320709884589444
IoU is 0.8561508240999901
loss for epoch 5 is 0.44770204376244377
IoU is 0.8560489842951174
loss for epoch 6 is 0.4445496293458533
IoU is 0.864691623297035
loss for epoch 7 is 0.43465584611639063
IoU is 0.8669212429705381
loss for epoch 8 is 0.4360389975791282
IoU is 0.8641347036992179
loss for epoch 9 is 0.4321307807315326
IoU is 0.8612685954831658
loss for epoch 10 is 0.4221947343214184
IoU is 0.8715614215404495
loss for epoch 11 is 0.42078879856048745
IoU is 0.8744113446302797
loss for epoch 12 is 0.4128914583236613
IoU is 0.8781005234583033
loss for epoch 13 is 0.4053884319802548
IoU is 0.8775685447505327
loss for epoch 14 is 0.40593474765195914
IoU is 0.8798953705925374
loss for epoch

([0.49640659332698117,
  0.4744671425709488,
  0.46471121886097794,
  0.4571142480939838,
  0.45320709884589444,
  0.44770204376244377,
  0.4445496293458533,
  0.43465584611639063,
  0.4360389975791282,
  0.4321307807315326,
  0.4221947343214184,
  0.42078879856048745,
  0.4128914583236613,
  0.4053884319802548,
  0.40593474765195914,
  0.4005045699523696,
  0.3991118553259694,
  0.3922876881580826,
  0.3899961775499033,
  0.389696737130483,
  0.387196642709962,
  0.38078730960264273,
  0.373157374296628,
  0.37068458736365567,
  0.3688809613809518,
  0.36264653387644613,
  0.3584478726835116,
  0.3601627210353283,
  0.35015756293391503,
  0.3555061175679484,
  0.3406515900336259,
  0.3461790995817658,
  0.33813060164874326,
  0.33681755623918896,
  0.3336431969564857,
  0.32628704829418914,
  0.3200967104722422,
  0.3270763395525885,
  0.3237378292895378,
  0.32116449788107093,
  0.31021296439018653,
  0.3113598084830223,
  0.30504315563127504,
  0.3015406160278523,
  0.30135835051959

In [43]:
model = MiniUnet()
list_of_losses, list_of_IoU = train(model)

[1;30;43mВыходные данные были обрезаны до нескольких последних строк (5000).[0m

 [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
   1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]

 [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1
   1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]

 [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1
   1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]

 [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
   0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1
   1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0]]

 [[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 

KeyboardInterrupt: ignored

In [45]:
def make_patches(voltage, labels, patch_len = 100, stride = 100):
  voltage_patches = []
  label_patches = []
  length = len(voltage)
  length = length - (length % patch_len)
  for i in range(0, length, stride):
    v_p = voltage[i:i+patch_len]
    l_p = labels[i:i+patch_len]
    if len(v_p) != 100:
      continue
    voltage_patches.append(v_p)
    label_patches.append(l_p)
  return voltage_patches, label_patches

In [48]:
list_of_filenames = list_of_filenames[:5]

In [49]:
list_of_filenames

['01122020_Block1_labels.txt',
 '01122020_Block2_labels.txt',
 '01122020_Block3_labels.txt',
 '01122020_Block4_labels.txt',
 '01122020_Block5_labels.txt']

In [50]:
all_voltage_patches, all_label_patches = load_group(list_of_filenames, path_to_folder)
len(all_voltage_patches)

30410

In [51]:
X_train, X_test, y_train, y_test = train_test_split(all_voltage_patches, all_label_patches,
                                                    random_state=42, shuffle=True, test_size=0.2)

In [52]:
train_dataset = MyDataset(X_train, y_train)
val_dataset = MyDataset(X_test, y_test)

In [53]:
train_loader = DataLoader(train_dataset, batch_size = 50,shuffle =True)
val_loader = DataLoader(val_dataset, batch_size = 10,shuffle =False)

In [54]:
model = MiniUnet()
list_of_losses, list_of_IoU = train(model)

loss for epoch 0 is 0.3830710134189117
IoU is 0.0
loss for epoch 1 is 0.2857027621006927
IoU is 0.0
loss for epoch 2 is 0.20924646256886092
IoU is 0.0
loss for epoch 3 is 0.1589877561640074
IoU is 0.0
loss for epoch 4 is 0.1121510333933658
IoU is 0.0
loss for epoch 5 is 0.07757979423443868
IoU is 0.0
loss for epoch 6 is 0.059037927128313406
IoU is 0.0
loss for epoch 7 is 0.04425014902903333
IoU is 0.0
loss for epoch 8 is 0.03541107742767322
IoU is 0.0
loss for epoch 9 is 0.024733167276251296
IoU is 0.0
loss for epoch 10 is 0.01947385267383604
IoU is 0.0
loss for epoch 11 is 0.015886119684611243
IoU is 0.0
loss for epoch 12 is 0.013140691014554211
IoU is 0.0
loss for epoch 13 is 0.010864094714060645
IoU is 0.0
loss for epoch 14 is 0.00907949814328248
IoU is 0.0
loss for epoch 15 is 0.007888934152179408
IoU is 0.0
loss for epoch 16 is 0.0065342553414110775
IoU is 0.0007980798621192711
loss for epoch 17 is 0.005697835668231772
IoU is 0.009792138124268669
loss for epoch 18 is 0.00504489898

test на другой серии экспериментов, все патчи подряд, IoU для всего трека.
Запустить на последовательности другой длины.
Скорость обучения большая, добавить schedular.
Печатать loss на train.
2 сверточных слоя в bottleneck
batch = 256/512
optuna !
Ещё 1 pool/unpool
Визуализировать маску и МПКП (test/val)
Сделать introduction
