# ISIBrnoAIMT Encoder with Attention Decoder

Encoder was taken from the winner of the [Will Two Do?](https://physionet.org/content/challenge-2021/1.0.3/sources/) challenge [ISIBrnoAIMT](https://www.cinc.org/archives/2021/pdf/CinC2021-014.pdf)
Decoder was taken from the [sequence to sequence tutorial](https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html) from Pytorch.

In [1]:
import pickle
import os
import sys
import torch
import pandas as pd
from sklearn.metrics import f1_score, jaccard_score, confusion_matrix, precision_score, recall_score, accuracy_score

from models.m04_EcgToText_ISIBrnoAIMT.dataset import *
from models.m04_EcgToText_ISIBrnoAIMT.model import *
from models.m04_EcgToText_ISIBrnoAIMT.train import *

In [2]:
os.chdir('..')

## Train

In [None]:
torch.manual_seed(42)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

n_epochs=50
hidden_size = 256

data_fractions = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 1.0]
for data_fraction in data_fractions:
    print(f"############################ dataset size: {int(data_fraction * 100)}% ############################")
    language, dataloader = get_dataloader(file_path='./data_ptb-xl', batch_size=64, mode='train', device=device, frac=data_fraction)
    _, val_dataloader = get_dataloader(file_path='./data_ptb-xl', batch_size=64, mode='val', device=device, _lang=language)
    
    criterion = nn.NLLLoss()
    
    encoder = NN(num_leads=12,
                 hidden_size=hidden_size).to(device)
    decoder = AttnDecoderRNN(hidden_size=hidden_size,
                             encoder_hidden_size=hidden_size,
                             output_size=language.n_words,
                             max_len=language.max_len).to(device)
    
    train(dataloader, val_dataloader, encoder, decoder, criterion, language, n_epochs, size=int(data_fraction*100))

############################ dataset size: 10% ############################
Sampling 1742 (10.0%)
0m 8s (- 7m 9s) (1 2.0%) | Train Loss: 1.5908 | Val METEOR: 0.2964
0m 19s (- 7m 43s) (2 4.0%) | Train Loss: 0.733 | Val METEOR: 0.3185
0m 29s (- 7m 45s) (3 6.0%) | Train Loss: 0.5073 | Val METEOR: 0.3106
0m 40s (- 7m 42s) (4 8.0%) | Train Loss: 0.398 | Val METEOR: 0.3525
0m 50s (- 7m 34s) (5 10.0%) | Train Loss: 0.3382 | Val METEOR: 0.3463
1m 0s (- 7m 26s) (6 12.0%) | Train Loss: 0.3014 | Val METEOR: 0.3107
1m 11s (- 7m 17s) (7 14.0%) | Train Loss: 0.2808 | Val METEOR: 0.3572
1m 21s (- 7m 8s) (8 16.0%) | Train Loss: 0.2529 | Val METEOR: 0.3488
1m 31s (- 6m 58s) (9 18.0%) | Train Loss: 0.2345 | Val METEOR: 0.2951
1m 42s (- 6m 49s) (10 20.0%) | Train Loss: 0.2204 | Val METEOR: 0.3602
1m 52s (- 6m 39s) (11 22.0%) | Train Loss: 0.2076 | Val METEOR: 0.3643
2m 3s (- 6m 30s) (12 24.0%) | Train Loss: 0.2014 | Val METEOR: 0.3295
2m 13s (- 6m 19s) (13 26.0%) | Train Loss: 0.1887 | Val METEOR: 0.3751

## Test

In [6]:
results = []

hidden_size = 256
criterion = nn.NLLLoss()

data_fractions = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9]
for data_fraction in data_fractions:
    language, dataloader = get_dataloader(file_path='./data_ptb-xl', batch_size=64, mode='train', device=device, frac=data_fraction)
    _, test_dataloader = get_dataloader(file_path='./data_ptb-xl', batch_size=64, mode='test', device=device, _lang=language)
    
    encoder = NN(num_leads=12,
                 hidden_size=hidden_size).to(device)
    decoder = AttnDecoderRNN(hidden_size=hidden_size,
                             encoder_hidden_size=hidden_size,
                             output_size=language.n_words,
                             max_len=language.max_len).to(device)
    
    encoder.load_state_dict(torch.load(f'./models/m04_EcgToText_ISIBrnoAIMT/models_with_reduced_dataset/Encoder_{int(data_fraction*100)}.pth'))
    decoder.load_state_dict(torch.load(f'./models/m04_EcgToText_ISIBrnoAIMT/models_with_reduced_dataset/Decoder_{int(data_fraction*100)}.pth'))
    
    total_loss, f1, jaccard, rouge, meteor = validate_epoch(dataloader, encoder, decoder, criterion, language)

    results.append({
        "Dataset": f"{int(data_fraction*100)}%",
        "Test Loss": round(total_loss, 4),
        "F1": round(f1, 4),
        "Jaccard": round(jaccard, 4),
        "Rouge-1 (p)": round(rouge["rouge-1"]["p"], 3),
        "Rouge-1 (r)": round(rouge["rouge-1"]["r"], 3),
        "Rouge-1 (f1)": round(rouge["rouge-1"]["f"], 3),
        "Rouge-2 (p)": round(rouge["rouge-2"]["p"], 3),
        "Rouge-2 (r)": round(rouge["rouge-2"]["r"], 3),
        "Rouge-2 (f1)": round(rouge["rouge-2"]["f"], 3),
        "Rouge-L (p)": round(rouge["rouge-l"]["p"], 3),
        "Rouge-L (r)": round(rouge["rouge-l"]["r"], 3),
        "Rouge-L (f1)": round(rouge["rouge-l"]["f"], 3),
        "METEOR": round(meteor, 3)
    })

df_results = pd.DataFrame(results)
df_results

Sampling 1742 (10.0%)
Sampling 3483 (20.0%)
Sampling 5225 (30.0%)
Sampling 6967 (40.0%)
Sampling 8708 (50.0%)
Sampling 10450 (60.0%)
Sampling 12192 (70.0%)
Sampling 13934 (80.0%)
Sampling 15675 (90.0%)


Unnamed: 0,Dataset,Test Loss,F1,Jaccard,Rouge-1 (p),Rouge-1 (r),Rouge-1 (f1),Rouge-2 (p),Rouge-2 (r),Rouge-2 (f1),Rouge-L (p),Rouge-L (r),Rouge-L (f1),METEOR
0,10%,2.5214,0.0562,0.0363,0.635,0.622,0.607,0.5,0.49,0.477,0.632,0.619,0.604,0.558
1,20%,2.3084,0.0607,0.041,0.663,0.705,0.664,0.542,0.569,0.539,0.661,0.703,0.661,0.594
2,30%,2.4939,0.0624,0.0438,0.674,0.682,0.657,0.549,0.548,0.53,0.671,0.679,0.654,0.59
3,40%,2.4877,0.04,0.0267,0.659,0.684,0.65,0.529,0.54,0.517,0.655,0.68,0.647,0.578
4,50%,2.7262,0.083,0.0589,0.708,0.698,0.685,0.589,0.579,0.569,0.705,0.694,0.682,0.636
5,60%,2.6758,0.0462,0.0332,0.67,0.7,0.665,0.544,0.559,0.534,0.667,0.697,0.661,0.588
6,70%,2.762,0.0703,0.0512,0.694,0.702,0.68,0.571,0.573,0.556,0.691,0.699,0.677,0.617
7,80%,2.3599,0.0204,0.0134,0.624,0.674,0.626,0.491,0.52,0.486,0.622,0.671,0.623,0.538
8,90%,2.808,0.0764,0.0554,0.723,0.747,0.718,0.617,0.635,0.611,0.72,0.744,0.715,0.661
