## Установка библиотек и распаковка данных

In [None]:
!pip install timm
!pip install wandb

Collecting timm
  Downloading timm-0.9.5-py3-none-any.whl (2.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.2/2.2 MB[0m [31m18.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub (from timm)
  Downloading huggingface_hub-0.16.4-py3-none-any.whl (268 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m268.8/268.8 kB[0m [31m30.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors (from timm)
  Downloading safetensors-0.3.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m60.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: safetensors, huggingface-hub, timm
Successfully installed huggingface-hub-0.16.4 safetensors-0.3.3 timm-0.9.5
Collecting wandb
  Downloading wandb-0.15.8-py3-none-any.whl (2.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.1/2.1 MB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
C

In [None]:
!unzip /content/drive/MyDrive/AIIJC/train.zip
!unzip /content/drive/MyDrive/AIIJC/test.zip

Archive:  /content/drive/MyDrive/AIIJC/train.zip
   creating: train/
 extracting: train/00009_hr.npy      
 extracting: train/00034_hr.npy      
 extracting: train/00043_hr.npy      
 extracting: train/00052_hr.npy      
 extracting: train/00057_hr.npy      
 extracting: train/00061_hr.npy      
 extracting: train/00081_hr.npy      
 extracting: train/00096_hr.npy      
 extracting: train/00108_hr.npy      
 extracting: train/00109_hr.npy      
 extracting: train/00115_hr.npy      
 extracting: train/00116_hr.npy      
 extracting: train/00131_hr.npy      
 extracting: train/00132_hr.npy      
 extracting: train/00144_hr.npy      
 extracting: train/00150_hr.npy      
 extracting: train/00157_hr.npy      
 extracting: train/00160_hr.npy      
 extracting: train/00170_hr.npy      
 extracting: train/00172_hr.npy      
 extracting: train/00173_hr.npy      
 extracting: train/00175_hr.npy      
 extracting: train/00192_hr.npy      
 extracting: train/00209_hr.npy      
 extracting: train/

In [None]:
!wandb login

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


## Загрузка данных

In [None]:
import torch
from torch import nn
import torchaudio
from torch.utils.data import Dataset
import torch.nn.functional as F

import numpy as np
import pandas as pd
import random
import os

from sklearn.model_selection import train_test_split
import wandb

In [None]:
wandb.init()

[34m[1mwandb[0m: Currently logged in as: [33mtsimbaliukk[0m ([33maiijc[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
config ={
    "train_path": "/content/train/",
    "test_path": "/content/test/",
    "train_batch_size": 64,
    "val_batch_size": 64,
    "epochs": 200,
    "lr": 1e-3,
    "seed": 42,
    "experiment": "crnn",
}

In [None]:
def seed_everything(TORCH_SEED):
    random.seed(TORCH_SEED)
    os.environ['PYTHONHASHSEED'] = str(TORCH_SEED)
    np.random.seed(TORCH_SEED)
    torch.manual_seed(TORCH_SEED)
    torch.cuda.manual_seed_all(TORCH_SEED)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

In [None]:
seed_everything(config['seed'])

In [None]:
ecg = {}

for file_name in os.listdir(config['train_path']):
    file_path = os.path.join(config['train_path'], file_name)
    if os.path.isfile(file_path) and ".npy" in file_name:
        with open(file_path, "rb") as f:
          ecg[file_name.replace(".npy", "")] = torchaudio.transforms.Spectrogram()(torch.Tensor(np.load(f, allow_pickle=True)))

In [None]:
labels = pd.read_csv("/content/train/train_gts.csv")
meta = pd.read_csv("/content/train/train_meta.csv")

## DataSet и DataLoader

In [None]:
class DataSet(Dataset):

  def __init__(self, ecg: dict, meta: pd.DataFrame, labels: pd.DataFrame = None):
    self.ecg = ecg
    self.labels = labels
    self.meta = meta

  def __len__(self):
    if not self.labels is None:
      return len(self.labels)
    else:
      return len(self.meta)

  def __getitem__(self, item):
    if not self.labels is None:
      row = self.labels.iloc[item]
      ecg = self.ecg[row['record_name']]
      label = row['myocard']

      return ecg, label

    else:
      row = self.meta.iloc[item]
      ecg = self.ecg[row['record_name']]

      return ecg, item

In [None]:
X_train, X_val, y_train, y_val = train_test_split(labels['record_name'], labels['myocard'], test_size=0.2, random_state=42)

In [None]:
val_dataset = DataSet(ecg, meta, pd.DataFrame({"record_name": X_val, "myocard": y_val}))
train_dataset = DataSet(ecg, meta, pd.DataFrame({"record_name": X_train, "myocard": y_train}))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=config['train_batch_size'], shuffle=True)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=config['val_batch_size'])

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
class ConvNet2D(nn.Module):
    def __init__(self):
        super(ConvNet2D, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=12, out_channels=32, kernel_size=3, stride=2, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=2, padding=1)
        self.conv3 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=2, padding=1)
        self.conv4 = nn.Conv2d(in_channels=32, out_channels=32, kernel_size=3, stride=2, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.bn2 = nn.BatchNorm2d(32)
        self.bn3 = nn.BatchNorm2d(32)
        self.bn4 = nn.BatchNorm2d(32)
        self.flatten = nn.Flatten()
        self.lstm = nn.LSTM(832, 256, 4, dropout=0.1, batch_first=True, bidirectional=True)
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(512, 1024),
            nn.ReLU(),
            nn.Linear(1024, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        x = self.bn1(F.relu((self.conv1(x))))
        x = self.bn2(F.relu((self.conv2(x))))
        x = self.bn3(F.relu((self.conv3(x))))
        x = self.bn4(F.relu((self.conv4(x))))
        x = self.flatten(x)
        x, _ = self.lstm(x)
        x = self.classifier(x)
        return x

In [None]:
model = ConvNet2D()
model.to(device)

ConvNet2D(
  (conv1): Conv2d(12, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv2): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv3): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (conv4): Conv2d(32, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
  (bn1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn4): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (lstm): LSTM(832, 256, num_layers=4, batch_first=True, dropout=0.1, bidirectional=True)
  (classifier): Sequential(
    (0): Dropout(p=0.5, inplace=False)
    (1): Linear(in_features=512, out_features=1024, bias=True)
    (2): ReLU()
    (3): Linear(in_features=1024, out_featu

In [None]:
loss_fn = nn.BCELoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=config['lr'])
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='max', factor=0.5, patience=12, verbose=True)

In [None]:
from sklearn.metrics import f1_score

def train_step(model, train_loader, loss_fn, optimizer, device):
  model.train()
  mean_loss = 0
  for i, (ecg, label) in enumerate(train_loader):
    ecg = ecg.to(device)
    label = label.type('torch.FloatTensor').to(device)
    output = model(ecg).squeeze()

    loss = loss_fn(output, label)
    loss.backward()

    optimizer.step()
    optimizer.zero_grad()

    mean_loss += loss

    if i % 10 == 0:
      print(f"Epochs: {epoch}, Iterations: {i}, MeanLoss: {mean_loss/(i+1)}")


def val_step(model, val_loader, device):
  model.eval()
  mean_f1 = 0
  for ecg, label in val_loader:
    ecg = ecg.to(device)
    with torch.no_grad():
      output = model(ecg)
    output = torch.round(output)
    f1 = f1_score(output.cpu(), label.cpu())
    mean_f1 += f1
  print("Validation F1: ", mean_f1/len(val_loader))
  return mean_f1/len(val_loader)

In [None]:
if not os.path.exists(config['experiment']):
  os.mkdir(config['experiment'])

In [None]:
best_f1 = 0

for epoch in range(config['epochs']):
  train_step(model, train_loader, loss_fn, optimizer, device)
  f1 = val_step(model, val_loader, device)

  scheduler.step(f1)

  torch.save(model.state_dict(), f"/content/{config['experiment']}/{config['experiment']}_{epoch}_{f1}.pt")
  if f1 > best_f1:
    best_f1 = f1
    torch.save(model.state_dict(), f"/content/{config['experiment']}/best_model.pt")

print("Best F1: ", best_f1)

Epochs: 0, Iterations: 0, MeanLoss: 0.6980451941490173
Epochs: 0, Iterations: 10, MeanLoss: 0.608225405216217
Epochs: 0, Iterations: 20, MeanLoss: 0.5431748032569885
Validation F1:  0.0
Epochs: 1, Iterations: 0, MeanLoss: 0.5778898596763611
Epochs: 1, Iterations: 10, MeanLoss: 0.5212451815605164
Epochs: 1, Iterations: 20, MeanLoss: 0.5168930292129517
Validation F1:  0.0
Epochs: 2, Iterations: 0, MeanLoss: 0.42048221826553345
Epochs: 2, Iterations: 10, MeanLoss: 0.47990289330482483
Epochs: 2, Iterations: 20, MeanLoss: 0.5083948969841003
Validation F1:  0.0
Epochs: 3, Iterations: 0, MeanLoss: 0.4215549826622009
Epochs: 3, Iterations: 10, MeanLoss: 0.49983784556388855
Epochs: 3, Iterations: 20, MeanLoss: 0.5037329792976379
Validation F1:  0.0
Epochs: 4, Iterations: 0, MeanLoss: 0.5023653507232666
Epochs: 4, Iterations: 10, MeanLoss: 0.5080703496932983
Epochs: 4, Iterations: 20, MeanLoss: 0.5124478340148926
Validation F1:  0.0
Epochs: 5, Iterations: 0, MeanLoss: 0.6081947088241577
Epochs: 

In [None]:
test_meta = pd.read_csv("test/test_meta.csv")

In [None]:
model.load_state_dict(torch.load("crnn/best_model.pt"))

model.eval()

In [None]:
answer = {}

for record_name in test_meta['record_name']:
  with open('test/'+record_name+".npy", 'rb') as f:
    with torch.no_grad():
      answer[record_name] = torch.round(model(torchaudio.transforms.Spectrogram()(torch.Tensor(np.load(f, allow_pickle=True)))[None, ...].to(device)).cpu().squeeze()).item()

In [None]:
submit = pd.DataFrame({"record_name": answer.keys(), "myocard": [int(i) for i in answer.values()]})

In [None]:
submit.to_csv("crnn.csv")

In [None]:
from google.colab import files
files.download('crnn.csv')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>