In [1]:
import pandas as pd
import ast
import wfdb
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import io

from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score
from sklearn.neighbors import NearestNeighbors

import torch
from torchinfo import summary
import torchvision
import torcheval


import math
import random
from collections import Counter

sns.set_style('darkgrid')
pd.set_option('display.max_columns', None)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

KeyboardInterrupt: 

# Работа с исходными данными

## Обработка метаданных

Загрузим метаданные о датасете и данные о диагнозах

In [None]:
meta_df = pd.read_csv('ptbxl_database.csv', index_col=0)
scp_df = pd.read_csv('scp_statements.csv')

meta_df

Отфильтруем данные.

В метаданных нужно оставить столбцы, включающие в себя диагноз, путь к файлу с временным рядом, разделения на 10 групп. Обработаем столбец с заключениями в соответствии с синтаксисом словаря.

В данных о диагнозах оставим только диагностические заключения

In [None]:
meta_df.scp_codes = meta_df.scp_codes.apply(lambda x: ast.literal_eval(x))
meta_df = meta_df[['scp_codes', 'strat_fold', 'filename_lr']]


scp_df = scp_df[scp_df.diagnostic == 1]
diagnose_set = set(scp_df['Unnamed: 0'])

meta_df

## Загрузка временных рядов

Для дальнейшего анализа будем использовать записи с частотой 100 измерений в секунду. Загрузим записи в numpy массив

In [None]:
def load_raw_data(df, sampling_rate, path):
    if sampling_rate == 100:
        data = [wfdb.rdsamp(path+f) for f in df.filename_lr]
    else:
        data = [wfdb.rdsamp(path+f) for f in df.filename_hr]
    data = np.array([signal for signal, meta in data])
    return data

path = ''
sampling_rate=100

X = load_raw_data(meta_df, sampling_rate, path)
X = X.transpose(0,2,1)
X.shape

Размер полученных данных соответствует 21837 случаям, для каждого проведено 12 одновременных измерений по 10*100=1000 точек

Визуализируем одно из ЭКГ

In [None]:
sample = X[45]

fig, axes = plt.subplots(12, 1, figsize=(20,10), sharex=True)
for i in range(12):
    sns.lineplot(x=np.arange(sample.shape[1]), y=sample[i, :], ax=axes[i])
plt.show()

## Разметка данных

Разметим ЭКГ, будем использовать только диагностические заметки, уровень будет равен степени уверенности в диагнозе

In [None]:
def marking(labels, classes):
    # Словарь для индексов классов
    classes_dict = {key: value for key, value in zip(classes, range(len(classes)))}
    res = np.zeros([len(labels), len(classes)])
    # Пройдёмся по каждому диагнозу, в нём просматриваем словарь, проверяем входит ли запись в диагнозы и, если да, то записываем значение в пределах [0,1]
    for i, ecg in enumerate(labels):
        for key, value in ecg.items():
            if key in classes_dict:
                j = classes_dict[key]
                res[i,j] = value/100
    return res    
    

In [None]:
Y = marking(meta_df.scp_codes, diagnose_set)

In [None]:
test_fold = [10]

X_test = []
Y_test = []
X_train = []
Y_train = []

for i in range(len(meta_df)):
    if Y[i].sum() == 0:
        continue
    if meta_df.iloc[i]['strat_fold'] in test_fold:
        X_test.append(X[i])
        Y_test.append(Y[i])
    else:
        X_train.append(X[i])
        Y_train.append(Y[i])
    print(f"Done {i} of {len(meta_df)}", end='\r')

X_test = np.array(X_test)
Y_test = np.array(Y_test)
X_train = np.array(X_train)
Y_train = np.array(Y_train)

## Анализ данных

#### Определение функций

In [None]:
def num_classes_row(data, name):
    count = np.zeros(len(diagnose_set))
    for row in data:
        num = (row > 0).sum()
        count[num] += 1
    plt.figure(figsize=(20, 6))
    plt.bar(np.arange(len(diagnose_set)), count)
    plt.title(f"Распределение количества диагнозов по ЭКГ в {name} датасете")
    plt.show()
    print({num: cnt for num, cnt in zip(range(len(count)), count)})


def num_classes_dataset(data, name):
    count = (data > 0).sum(axis=0)
    plt.figure(figsize=(20, 6))
    plt.bar(list(diagnose_set), count)
    plt.xticks(rotation=60)
    plt.title(f"Распределение частоты диагнозов в {name} датасете")
    plt.show()

def sum_coef_classes_dataset(data, name):
    count = data.sum(axis=0)
    plt.figure(figsize=(20, 6))
    plt.bar(list(diagnose_set), count)
    plt.xticks(rotation=60)
    plt.title(f"Распределение диагнозов, учитывая стпень уверенности в {name} датасете")
    plt.show()


def avg_coef_classes_dataset(data, name):
    count1 = data.sum(axis=0)
    count2 = (data > 0).sum(axis=0)
    avg_count = count1 / count2
    plt.figure(figsize=(20, 6))
    plt.bar(list(diagnose_set), avg_count)
    plt.xticks(rotation=60)
    plt.title(f"Распределение средней уверенности в диагнозе в {name} датасете")
    plt.show()
    

#### Анализ обучающего и тестового датасетов

In [None]:
num_classes_row(Y_train, 'обучающем')
num_classes_row(Y_test, 'тестовом')

In [None]:
sum_coef_classes_dataset(Y_train, 'обучающем')
sum_coef_classes_dataset(Y_test, 'тестовом')

In [None]:
num_classes_dataset(Y_train, 'обучающем')
num_classes_dataset(Y_test, 'тестовом')

In [None]:
avg_coef_classes_dataset(Y_train, 'обучающем')
avg_coef_classes_dataset(Y_test, 'тестовом')

## Балансировка обучающего датасета

#### Определение авторского метода балансировки

In [None]:
def Prob(x, threshold_inf):
    if x <= threshold_inf:
        return 0
    return 1 / (1 + math.exp(-math.log(x)))


def get_information(values, distribution_0):
    arr = np.zeros(len(values))
    for i in range(len(arr)):
        arr[i] = -math.log(abs(values[i] - 1/distribution_0[i]))        
    return np.sqrt(np.mean(arr**2))


def multiply(dataX, dataY, threshold_i, threshold_p):
    distribution0 = 1 - (dataY > 0).sum(axis=0)/len(dataY)
    information = [get_information(row, distribution0) for row in dataY]
    multiply_probability = [Prob(information_value,threshold_i) for information_value in information]
    dataY = list(dataY)
    dataX = list(dataX)
    for i in range(len(dataY)):
        if random.random() < multiply_probability[i] and multiply_probability[i] > threshold_p:
            dataY.append(dataY[i])
            dataX.append(dataX[i])
    return (np.array(dataX), np.array(dataY))

#### Определение MLSMOTE

https://www.kaggle.com/code/tolgadincer/upsampling-multilabel-data-with-mlsmote

In [None]:
def create_dataset(n_sample=1000):
    ''' 
    Create a unevenly distributed sample data set multilabel  
    classification using make_classification function
    
    args
    nsample: int, Number of sample to be created
    
    return
    X: pandas.DataFrame, feature vector dataframe with 10 features 
    y: pandas.DataFrame, target vector dataframe with 5 labels
    '''
    X, y = make_classification(n_classes=5, class_sep=2,
                               weights=[0.1,0.025, 0.205, 0.008, 0.9], n_informative=3, n_redundant=1, flip_y=0,
                               n_features=10, n_clusters_per_class=1, n_samples=1000, random_state=10)
    y = pd.get_dummies(y, prefix='class')
    return pd.DataFrame(X), y

def get_tail_label(df: pd.DataFrame, ql=[0.05, 1.]) -> list:
    """
    Find the underrepresented targets.
    Underrepresented targets are those which are observed less than the median occurance.
    Targets beyond a quantile limit are filtered.
    """
    irlbl = df.sum(axis=0)
    irlbl = irlbl[(irlbl > irlbl.quantile(ql[0])) & ((irlbl < irlbl.quantile(ql[1])))]  # Filtering
    irlbl = irlbl.max() / irlbl
    threshold_irlbl = irlbl.median()
    tail_label = irlbl[irlbl > threshold_irlbl].index.tolist()
    return tail_label

def get_minority_samples(X: pd.DataFrame, y: pd.DataFrame, ql=[0.05, 1.]):
    """
    return
    X_sub: pandas.DataFrame, the feature vector minority dataframe
    y_sub: pandas.DataFrame, the target vector minority dataframe
    """
    tail_labels = get_tail_label(y, ql=ql)
    index = y[y[tail_labels].apply(lambda x: (x == 1).any(), axis=1)].index.tolist()
    
    X_sub = X[X.index.isin(index)].reset_index(drop = True)
    y_sub = y[y.index.isin(index)].reset_index(drop = True)
    return X_sub, y_sub

def nearest_neighbour(X: pd.DataFrame, neigh) -> list:
    """
    Give index of 10 nearest neighbor of all the instance
    
    args
    X: np.array, array whose nearest neighbor has to find
    
    return
    indices: list of list, index of 5 NN of each element in X
    """
    nbs = NearestNeighbors(n_neighbors=neigh, metric='euclidean', algorithm='kd_tree').fit(X)
    euclidean, indices = nbs.kneighbors(X)
    return indices

def MLSMOTE(X, y, n_sample, neigh=5):
    """
    Give the augmented data using MLSMOTE algorithm
    
    args
    X: pandas.DataFrame, input vector DataFrame
    y: pandas.DataFrame, feature vector dataframe
    n_sample: int, number of newly generated sample
    
    return
    new_X: pandas.DataFrame, augmented feature vector data
    target: pandas.DataFrame, augmented target vector data
    """
    indices2 = nearest_neighbour(X, neigh=5)
    n = len(indices2)
    new_X = np.zeros((n_sample, X.shape[1]))
    target = np.zeros((n_sample, y.shape[1]))
    for i in range(n_sample):
        reference = random.randint(0, n-1)
        neighbor = random.choice(indices2[reference, 1:])
        all_point = indices2[reference]
        nn_df = y[y.index.isin(all_point)]
        ser = nn_df.sum(axis = 0, skipna = True)
        target[i] = np.array([1 if val > 0 else 0 for val in ser])
        ratio = random.random()
        gap = X.loc[reference,:] - X.loc[neighbor,:]
        new_X[i] = np.array(X.loc[reference,:] + ratio * gap)
    new_X = pd.DataFrame(new_X, columns=X.columns)
    target = pd.DataFrame(target, columns=y.columns)
    return new_X, target

#### Балансировка

#### Авторский метод

In [None]:
Y_train_balanced = Y_train
X_train_balanced = X_train

In [None]:
Y_train_balanced.sum(axis=1).shape

In [None]:
print('изначальное распределение количества классов для примера %s' % Counter(Y_train_balanced.sum(axis=1)))
print('изначальное распределение вероятности классов', *Y_train_balanced.sum(axis=0)/len(Y_train_balanced))
print('изначальное распределение редкости классов', *max(Y_train_balanced.sum(axis=0))/Y_train_balanced.sum(axis=0))


In [None]:
for i in range(20):
    X_train_balanced, Y_train_balanced = multiply(X_train_balanced, Y_train_balanced, 0.9, 0.1)
    print('\n')
    print(f"Длина датасета на шаге {i+1} {len(Y_train_balanced)}")
    print(f'Распределение редкости классов на шаге {i+1} {max(Y_train_balanced.sum(axis=0))/Y_train_balanced.sum(axis=0)}')

#### MLSMOTE

In [None]:
ter = torch.tensor(X_train, dtype=torch.float32)
flat = torch.flatten(ter, 1)

In [None]:
a = pd.DataFrame(flat)
b = pd.DataFrame(Y_train > 0)

In [None]:
X_sub, y_sub = get_minority_samples(a, b)  # Getting minority samples of that datframe
X_res, y_res = MLSMOTE(X_sub, y_sub, 10000, 10)  # Applying MLSMOTE to augment the dataframe

X_res = np.array(X_res)
y_res = np.array(y_res)

X_res = torch.tensor(X_res, dtype=torch.float32)
X_res = X_res.view(-1, 12, 1000)
X_res = np.array(X_res)

X_train_balanced_MLSMOTE = np.concatenate((X_train, X_res), axis=0)
Y_train_balanced_MLSMOTE = np.concatenate((Y_train, y_res), axis=0)

#### Результат балансировки

In [None]:
num_classes_dataset(Y_train_balanced, 'сбалансированном авторском обучающем')
num_classes_dataset(Y_train_balanced_MLSMOTE, 'сбалансированном MLSMOTE обучающем')
num_classes_dataset(Y_train, 'исходном обучающем')

In [None]:
num_classes_row(Y_train_balanced_MLSMOTE, 'сбалансированном MLSMOTE обучающем')

## Подготовка моделей классификатора

### Подготовка загрузчиков данных

In [None]:
Y_train1 = Y_train > 0
Y_train_balanced1 = Y_train_balanced > 0
Y_train_balanced_MLSMOTE1 = Y_train_balanced_MLSMOTE > 0
Y_test1 = Y_test > 0

In [None]:
X_train1 = X_train.transpose(0,2,1)
X_train_balanced1 = X_train_balanced.transpose(0,2,1)
X_train_balanced_MLSMOTE1 = X_train_balanced_MLSMOTE.transpose(0,2,1)
X_test1 = X_test.transpose(0,2,1)

In [None]:
X_train1 = torch.tensor(X_train1, dtype=torch.float32)
Y_train1 = torch.tensor(Y_train1, dtype=torch.float32)
X_train_balanced1 = torch.tensor(X_train_balanced1, dtype=torch.float32)
Y_train_balanced1 = torch.tensor(Y_train_balanced1, dtype=torch.float32)
X_train_balanced_MLSMOTE1 = torch.tensor(X_train_balanced_MLSMOTE1, dtype=torch.float32)
Y_train_balanced_MLSMOTE1 = torch.tensor(Y_train_balanced_MLSMOTE1, dtype=torch.float32)
X_test1 = torch.tensor(X_test1, dtype=torch.float32)
Y_test1 = torch.tensor(Y_test1, dtype=torch.float32)

batch = 256

Train_basic = torch.utils.data.DataLoader(tuple(zip(X_train1, Y_train1)), batch_size=batch, shuffle=True)
Train_balanced = torch.utils.data.DataLoader(tuple(zip(X_train_balanced1, Y_train_balanced1)), batch_size=batch, shuffle=True)
Train_balanced_MLSMOTE = torch.utils.data.DataLoader(tuple(zip(X_train_balanced_MLSMOTE1, Y_train_balanced_MLSMOTE1)), batch_size=batch, shuffle=True)
Test = torch.utils.data.DataLoader(tuple(zip(X_test1, Y_test1)), batch_size=batch)



In [None]:
metrics_arr = [accuracy_score, precision_score, f1_score]

### Функция обучения

In [None]:
def train_model(model, learning_rate, train, test, epoch_num, metrics_threshold, label,
                metric_funcs, verbose=False, plot=True):
    
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
  
    
    hist_train = np.zeros(epoch_num)
    hist_test = np.zeros(epoch_num)
    metrics_train = np.zeros((epoch_num, len(metric_funcs), len(diagnose_set)))
    metrics_test = np.zeros((epoch_num, len(metric_funcs), len(diagnose_set)))
    
    
    for i in range(epoch_num):      
        
        hist_train_epoch = 0
        Pred_train = np.empty((1, len(diagnose_set)))
        Real_train = np.empty((1, len(diagnose_set)))
        
        model.train()
        for j, (X, Y) in enumerate(train): 
            X = X.to(device)
            Y = Y.to(device)
            optimizer.zero_grad()
            
            Y_pred = model(X)
            loss = criterion(Y_pred, Y)
            loss.backward()
            optimizer.step()
            
            hist_train_epoch += loss.item()
            Y_threshold = torch.sigmoid(Y_pred) >= metrics_threshold
            Pred_train = np.concatenate((Pred_train, Y_threshold.cpu().detach().numpy()))
            Real_train = np.concatenate((Real_train, Y.cpu().detach().numpy()))

            if verbose:
                print(f"Пройден обучающий батч {j} из {len(train)}", end='\r')
        print('\n')
        
        
        hist_test_epoch = 0
        Pred_test = np.empty((1, len(diagnose_set)))
        Real_test = np.empty((1, len(diagnose_set)))
        
        model.eval()
        for j, (X, Y) in enumerate(test):
            X = X.to(device)
            Y = Y.to(device)            
            Y_pred = model(X)
            loss = criterion(Y_pred, Y)

            hist_test_epoch += loss.item()
            Y_threshold = torch.sigmoid(Y_pred) >= metrics_threshold
            Pred_test = np.concatenate((Pred_test, Y_threshold.cpu().detach().numpy()))
            Real_test = np.concatenate((Real_test, Y.cpu().detach().numpy()))

            if verbose:
                print(f"Пройден тестовый батч {j} из {len(test)}", end='\r')
        print('\n')

        
        Pred_train = Pred_train[1:]
        Pred_test = Pred_test[1:]
        Real_train = Real_train[1:]
        Real_test = Real_test[1:]

        for j, metric in enumerate(metric_funcs):
            for k, (pred_col, trg_col) in enumerate(zip(Pred_train.T, Real_train.T)):
                metrics_train[i,j,k] = metric(pred_col, trg_col)

        for j, metric in enumerate(metric_funcs):
            for k, (pred_col, trg_col) in enumerate(zip(Pred_test.T, Real_test.T)):
                metrics_test[i,j,k] = metric(pred_col, trg_col)
        

        hist_train[i] = hist_train_epoch/len(train)
        hist_test[i] = hist_test_epoch/len(test)

        if verbose:
            if (i) % verbose == 0:
                print(f"Пройдена эпоха {i+1} из {epoch_num}, результаты:")
                print(f"Loss Train / Test: {hist_train[i]} / {hist_test[i]}")
                for j, metric in enumerate(metric_funcs):
                    tmp_dict_train = {key: value for key, value in zip(diagnose_set, metrics_train[i, j, :])}
                    tmp_dict_test = {key: value for key, value in zip(diagnose_set, metrics_test[i, j, :])}
                    print(f"Обучающая выборка - значения метрики {metric.__name__} на итерации {i+1} = \n {tmp_dict_train}")
                    print(f"Тестовая выборка - значения метрики {metric.__name__} на итерации {i+1} = \n {tmp_dict_test}")

    if plot:
        plt.figure(figsize=(16, 6))        
        plt.plot(hist_test, label=f"Loss test {label}")
        plt.plot(hist_train, label=f"Loss train {label}")
        plt.grid()
        plt.legend()
        

    return (hist_train, hist_test, metrics_train, metrics_test)

### Рекурентная GRU

In [None]:
class Network_gru(torch.nn.Module):
    def __init__(self, hidden, layers):
        super().__init__()
        self.gru2 = torch.nn.GRU(input_size=12, hidden_size=hidden, batch_first=True, bidirectional=True, num_layers=layers, dropout=0.25)

        self.flatten = torch.nn.Flatten()
        self.head = torch.nn.Linear(layers*2*hidden, len(diagnose_set))
        
    def forward(self, X):
        _, s3 = self.gru2(X)
        flat = self.flatten(torch.transpose(s3, 0, 1))
        out = self.head(flat)
        return out

criterion = torch.nn.BCEWithLogitsLoss()

In [None]:
model_gru_basic = Network_gru(64, 4)
recurent_res_basic = train_model(model_gru_basic, 0.001, Train_basic, Test, 40, 0.5, 'recurrent basic', 
                  metrics_arr, verbose=1)

In [None]:
model_gru_balanced = Network_gru(64, 4)
recurent_res_balanced = train_model(model_gru_balanced, 0.001, Train_balanced, Test, 25, 0.5, 'recurrent balanced', 
                  metrics_arr, verbose=1)

In [None]:
model_gru_balanced_MLSMOTE = Network_gru(64, 4)
recurent_res_balanced_MLSMOTE = train_model(model_gru_balanced_MLSMOTE, 0.001, Train_balanced_MLSMOTE, Test, 25, 0.5, 'recurrent balanced SMOTE', 
                  metrics_arr, verbose=1)

### Свёрточная

In [None]:

X_train2 = torch.tensor(X_train, dtype=torch.float32)
X_train_balanced2 = torch.tensor(X_train_balanced, dtype=torch.float32)
X_train_balanced_MLSMOTE2 = torch.tensor(X_train_balanced_MLSMOTE, dtype=torch.float32)
X_test2 = torch.tensor(X_test, dtype=torch.float32)


batch = 256

Train_basic_conv = torch.utils.data.DataLoader(tuple(zip(X_train2, Y_train1)), batch_size=batch, shuffle=True)
Train_balanced_conv = torch.utils.data.DataLoader(tuple(zip(X_train_balanced2, Y_train_balanced1)), batch_size=batch, shuffle=True)
Train_balanced_MLSMOTE_conv = torch.utils.data.DataLoader(tuple(zip(X_train_balanced_MLSMOTE2, Y_train_balanced_MLSMOTE1)), batch_size=batch, shuffle=True)
Test_conv = torch.utils.data.DataLoader(tuple(zip(X_test2, Y_test1)), batch_size=batch)



In [None]:
Network_basic_conv = torch.nn.Sequential(
    torch.nn.Conv1d(12, 48, 7, padding=3, stride=3),
    torch.nn.ReLU(),
    torch.nn.Dropout1d(0.25),
    torch.nn.Conv1d(48, 128, 5, padding=2, stride=2),
    torch.nn.ReLU(),
    torch.nn.MaxPool1d(3, stride=2),
    torch.nn.Dropout1d(0.25),
    torch.nn.Conv1d(128, 256, 3, padding=1, stride=1),
    torch.nn.MaxPool1d(3, stride=2),
    torch.nn.Dropout1d(0.25),
    torch.nn.Conv1d(256, 512, 3, padding=1, stride=1),
    torch.nn.AvgPool1d(3, stride=2),
    torch.nn.Dropout1d(0.25),
    torch.nn.Conv1d(512, 128, 5, stride=2),
    torch.nn.ReLU(),
    torch.nn.Flatten(),
    torch.nn.Linear(1024, len(diagnose_set))
)

criterion = torch.nn.BCEWithLogitsLoss()

buffer = io.BytesIO()
torch.save(Network_basic_conv, buffer) 
print(buffer.tell()) 

del Network_basic_conv

In [None]:
buffer.seek(0)
model_conv_basic = torch.load(buffer)

conv_res_basic = train_model(model_conv_basic, 0.001, Train_basic_conv, Test_conv, 40, 0.5, 'convolutional basic', 
                  metrics_arr, verbose=1)

In [None]:
buffer.seek(0)
model_conv_balanced = torch.load(buffer)

conv_res_balanced = train_model(model_conv_balanced, 0.001, Train_balanced_conv, Test_conv, 25, 0.5, 'convolutional balanced', 
                  metrics_arr, verbose=1)

In [None]:
buffer.seek(0)
model_conv_balanced_MLSMOTE = torch.load(buffer)

conv_res_balanced_MLSMOTE = train_model(model_conv_balanced_MLSMOTE, 0.001, Train_balanced_MLSMOTE_conv, Test_conv, 25, 0.5, 'convolutional balanced SMOTE', 
                  metrics_arr, verbose=1)

In [None]:
del buffer

### Свёрточная со skip connection 

In [None]:
class resnet_block_1D(torch.nn.Module):
    def __init__(self, c1, c2, c3, c4):
        super().__init__()
        self.forw = torch.nn.Sequential(
            torch.nn.Conv1d(c1, c2, 3, padding=1),
            torch.nn.BatchNorm1d(c2),
            torch.nn.ReLU(),
            torch.nn.Conv1d(c2, c3, 3, padding=1),
            torch.nn.BatchNorm1d(c3),
            torch.nn.ReLU(),
            torch.nn.Conv1d(c3, c4, 3, padding=1),
            torch.nn.BatchNorm1d(c4)
        )
        self.residual = torch.nn.Conv1d(c1, c4, 1)

    def forward(self, x):
        forw = self.forw(x)
        residual = self.residual(x)
        return forw + residual

In [None]:
Network_residual_conv = torch.nn.Sequential(
    resnet_block_1D(12, 24, 36, 48),
    torch.nn.ReLU(),
    torch.nn.MaxPool1d(3, stride=2),
    torch.nn.Dropout1d(0.25),
    resnet_block_1D(48, 72, 100, 128),
    torch.nn.ReLU(),
    torch.nn.MaxPool1d(3, stride=2),
    torch.nn.Dropout1d(0.25),
    resnet_block_1D(128, 160, 200, 256),
    torch.nn.ReLU(),
    torch.nn.MaxPool1d(3, stride=2),
    torch.nn.Dropout1d(0.25),
    resnet_block_1D(256, 320, 400, 512),
    torch.nn.ReLU(),
    torch.nn.MaxPool1d(3, stride=2),
    torch.nn.Dropout1d(0.25),
    resnet_block_1D(512, 680, 850, 1024),
    torch.nn.ReLU(),
    torch.nn.AvgPool1d(61),
    torch.nn.ReLU(),
    torch.nn.Flatten(),
    torch.nn.Linear(1024, len(diagnose_set))
)

criterion = torch.nn.BCEWithLogitsLoss()

buffer = io.BytesIO()
torch.save(Network_residual_conv, buffer) 
print(buffer.tell()) 

del Network_residual_conv

In [None]:
buffer.seek(0)
model_conv_residual_basic = torch.load(buffer)

conv_res_residual_basic = train_model(model_conv_residual_basic, 0.001, Train_basic_conv, Test_conv, 20, 0.5, 'convolutional residual basic', 
                  metrics_arr, verbose=1)

In [None]:
buffer.seek(0)
model_conv_residual_balanced = torch.load(buffer)

conv_res_residual_balanced = train_model(model_conv_residual_balanced, 0.001, Train_balanced_conv, Test_conv, 20, 0.5, 'convolutional residual balanced', 
                  metrics_arr, verbose=1)

In [None]:
buffer.seek(0)
model_conv_residual_balanced_MLSMOTE = torch.load(buffer)

conv_res_residual_balanced_MLSMOTE = train_model(model_conv_residual_balanced_MLSMOTE, 0.001, Train_balanced_MLSMOTE_conv, Test_conv, 20, 0.5, 'convolutional residual balanced SMOTE', 
                  metrics_arr, verbose=1)

In [None]:
del buffer

## Анализ результатов

In [None]:
df_res_data = np.expand_dims(Y_test1.sum(axis=0).int(), axis=0)
columns = ["Количество примеров"]
col1 = ["GRU", "Свёрточная базовая", "Свёрточная продвинутая"]
col2 = ["Исходная выборка", "Авnорская балансировка", "MLSMOTE"]

results = [
    recurent_res_basic, recurent_res_balanced, recurent_res_balanced_MLSMOTE,    
    conv_res_basic, conv_res_balanced, model_conv_balanced_MLSMOTE, 
    conv_res_residual_basic, conv_res_residual_balanced, conv_res_residual_balanced_MLSMOTE
          ]

for metric_num in range(len(metrics_arr)):
    for res in results:
        df_res_data = np.concatenate((df_res_data, [res[3][-1, metric_num]]), axis=0)

for metric in metrics_arr:
    for c1 in col1:
        for c2 in col2:
            columns += [c1 + ' ' + c2 + ' '  + metric.__name__]


res_df = pd.DataFrame(data=np.transpose(df_res_data), index=list(diagnose_set), columns=columns)

In [None]:
pd.options.display.precision = 3
pd.set_option('display.precision', 3)

res_df