In [1]:
import torch
import pandas as pd
import os
from torch.utils.data import DataLoader
import torch.optim as optim
from biobertology import get_tokenizer
import sys

sys.path.append('..')
from shared.models import *
from shared.datasets import *
from shared.metrics import *


def train(text_inputs, attention_inputs, labels, model, criterion, device, optimizer, freeze=False):
    # Training loop
    model.train()

    # Freeze all layers except those indicated
    if freeze:
        for name, param in model.named_parameters():
            if name not in freeze:
                param.requires_grad = False
    
    # Train the entire support set in one batch
    optimizer.zero_grad()
    pred = model(text_inputs, attention_inputs)
    loss = criterion(pred, labels)
    loss.backward()
    optimizer.step()
    train_loss = loss.item()  # Running training loss

    return train_loss


def test(text_inputs, attention_inputs, labels, model, criterion, device, n_way):
    # An F1 Score of 0 indicates that it is invalid
    model.eval()
    true_positive = list(0. for i in range(n_way))  # Number of correctly predicted samples per class
    total_truth = list(0. for i in range(n_way))  # Number of ground truths per class
    predicted_positive = list(0. for i in range(n_way))  # Number of predicted samples per class
    correct_total = 0  # Total correctly predicted samples
    total = 0  # Total samples
    with torch.no_grad():
        # Test the entire query set in one batch
        pred = model(text_inputs, attention_inputs)
        loss = criterion(pred, labels)
        val_loss = loss.item()  # Running validation loss
        _, predicted = torch.max(pred, 1)
        correct = (predicted == labels).squeeze()  # Samples that are correctly predicted
        correct_total += (predicted == labels).sum().item()
        total += labels.size(0)

        for i in range(len(predicted)):
            label = labels[i]
            true_positive[label] += correct[i].item()
            total_truth[label] += 1
            predicted_positive[predicted[i].item()] += 1  # True Positive + False Positive

    accuracy, macro_accuracy, f1_score, class_f1 = metrics(true_positive, total_truth,
                                                           predicted_positive, correct_total, total)

    return val_loss, accuracy, macro_accuracy, f1_score, class_f1


def main(k_shot):
    # Set Training Parameters
    n_way = 3
    k_query = 16
    num_episodes = 200
    num_epochs = 100
    num_workers = 12
    bs = 4
    lr = 1e-3
    root_text = '../../../../scratch/rl80/mimic-cxr-2.0.0.physionet.org'
    path_biobert = '../results'
    path_splits = '../splits/splits.csv'  # Location of preprocessed splits
    path_results = '../../results'  # Folder to save the CSV results
    freeze = ['linear.weight', 'linear.bias']  # Freeze all layers except linear layers

    torch.cuda.set_device(0)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Training tools
    criterion = nn.CrossEntropyLoss()
    tokenizer = get_tokenizer()

    # Load in data
    dataset = MimicCxrReportsEpisodes(root_text, path_splits, tokenizer, n_way, k_shot, k_query, num_episodes, 'novel')
    loader = DataLoader(dataset, batch_size=bs, shuffle=True, num_workers=num_workers)

    # Create Dataframe to export results to CSV
    df_results = pd.DataFrame(columns=['Epoch', 'Training Loss', 'Validation Loss', 'Accuracy', 'Macro Accuracy',
                                       'Macro-F1 Score'] + [str(x) + ' F1' for x in range(n_way)])

    # Iterate through batched episodes. One episode is one experiment
    for step, (support_texts, support_masks, support_labels, query_texts, query_masks, query_labels) in enumerate(loader):
        # Convert Tensors to appropriate device
        batch_support_x, batch_support_masks, batch_support_y, batch_query_x, batch_query_masks, batch_query_y = \
            support_texts.to(device), support_masks.to(device), support_labels.to(device), \
            query_texts.to(device), query_masks.to(device), query_labels.to(device)

        # [num_batch, training_sz, channels, height, width] = support_x.size()
        # num_batch = num of episodes
        # training_sz = size of support or query set
        num_batch = batch_support_x.size(0) # Number of episodes in the batch

        # Break down the batch of episodes into single episodes
        for i in range(num_batch):
            # Load in model and reset weights every episode/experiment
            model = SemanticNet(n_way, path_biobert).to(device)

            # Reset optimizer with model parameters
            optimizer = optim.Adam(model.parameters(), lr=lr)

            # Break down the sets into individual episodes
            support_x, support_m, support_y, query_x, query_m, query_y = \
                batch_support_x[i], batch_support_masks[i], batch_support_y[i], \
                batch_query_x[i], batch_query_masks[i], batch_query_y[i]

            # Variables for best epoch per experiment
            best_score = 0
            best_epoch = 0
            df_best = pd.DataFrame(columns=['Epoch', 'Training Loss', 'Validation Loss', 'Accuracy', 'Macro Accuracy',
                                       'Macro-F1 Score'] + [str(x) + ' F1' for x in range(n_way)]) # Track best epoch
            # Training and testing for specified epochs
            for epoch in range(num_epochs):
                # Training
                train_loss = train(support_x, support_m, support_y, model, criterion, device, optimizer, freeze=freeze)

                # Testing
                val_loss, acc, m_acc, macro_f1, class_f1 = test(query_x, query_m, query_y, 
                                                                model, criterion, device, n_way)

                # Find best epoch
                score = 0.5*acc + 0.5*macro_f1
                if score > best_score:
                    best_score = score
                    df_best.loc[0] = [epoch + 1, train_loss, val_loss, acc, m_acc, macro_f1] + class_f1

            # Print the best results per experiment
            print(
                f'[{int(df_best.iloc[0,0])}] t_loss: {df_best.iloc[0,1]} v_loss: {df_best.iloc[0,2]} '
                f'val_acc: {df_best.iloc[0,3]} f1: {df_best.iloc[0,5]}')

            # Record the best epoch to be saved into a CSV
            df_results = df_results.append(df_best.loc[0], ignore_index=True)

    # Create results folder if it does not exist
    if not os.path.exists(path_results):
        os.makedirs(path_results)

    # Export results to a CSV file
    df_results.to_csv(os.path.join(path_results, f'{k_shot}shot_semantic.csv'), index=False)


if __name__ == '__main__':
    print(f'Semantic Training {sys.argv[1]} shot')
    main(int(sys.argv[1]))  # Get the k_shot variable from command line


[1] t_loss: 1.10848 v_loss: 1.09739 val_acc: 0.31250 val_m_acc: 0.31250 f1: 0.00000
[2] t_loss: 1.10093 v_loss: 1.09857 val_acc: 0.35417 val_m_acc: 0.35417 f1: 0.00000
[3] t_loss: 1.10490 v_loss: 1.09632 val_acc: 0.41667 val_m_acc: 0.41667 f1: 0.00000
[4] t_loss: 1.08619 v_loss: 1.09937 val_acc: 0.37500 val_m_acc: 0.37500 f1: 0.25428
[5] t_loss: 1.07951 v_loss: 1.10369 val_acc: 0.33333 val_m_acc: 0.33333 f1: 0.27403
[6] t_loss: 1.07948 v_loss: 1.10590 val_acc: 0.33333 val_m_acc: 0.33333 f1: 0.26903
[7] t_loss: 1.07405 v_loss: 1.10673 val_acc: 0.31250 val_m_acc: 0.31250 f1: 0.26413
[8] t_loss: 1.07881 v_loss: 1.10916 val_acc: 0.27083 val_m_acc: 0.27083 f1: 0.20168
[9] t_loss: 1.06343 v_loss: 1.11292 val_acc: 0.33333 val_m_acc: 0.33333 f1: 0.00000
[10] t_loss: 1.05573 v_loss: 1.11611 val_acc: 0.35417 val_m_acc: 0.35417 f1: 0.00000
[11] t_loss: 1.05835 v_loss: 1.11555 val_acc: 0.31250 val_m_acc: 0.31250 f1: 0.00000
[12] t_loss: 1.04984 v_loss: 1.11379 val_acc: 0.33333 val_m_acc: 0.33333 f

[98] t_loss: 0.76456 v_loss: 1.05394 val_acc: 0.56250 val_m_acc: 0.56250 f1: 0.54923
[99] t_loss: 0.79496 v_loss: 1.05225 val_acc: 0.56250 val_m_acc: 0.56250 f1: 0.54923
[100] t_loss: 0.75829 v_loss: 1.05100 val_acc: 0.54167 val_m_acc: 0.54167 f1: 0.53086
[88] t_loss: 0.8008180260658264 v_loss: 1.057766318321228 val_acc: 0.5625 f1: 0.549227098246706


In [18]:
support_y

tensor([0, 2, 1, 2, 2, 2, 1, 2, 1, 2, 0, 0, 1, 2, 0, 1, 0, 1, 2, 1, 1, 1, 0, 1,
        2, 2, 0, 2, 1, 2, 1, 2, 0, 2, 0, 0, 1, 0, 0, 2, 2, 1, 0, 1, 1, 1, 0, 0,
        0, 1, 2, 2, 0, 0, 2, 2, 0, 0, 1, 1], device='cuda:0')

In [5]:
# Set Training Parameters
n_way = 3
k_shot = 20
k_query = 16
num_episodes = 1
num_epochs = 100
num_workers = 12
bs = 4
lr = 1e-4
root_text = '../../../../scratch/rl80/mimic-cxr-2.0.0.physionet.org'
path_biobert = '../results'
path_splits = '../splits/splits.csv'  # Location of preprocessed splits
path_results = '../../results'  # Folder to save the CSV results
freeze = ['linear.weight', 'linear.bias']  # Freeze all layers except linear layers

torch.cuda.set_device(0)
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

model = SemanticNet(n_way, path_biobert).to(device)

In [3]:
for name, param in model.named_parameters():
    print(param.requires_grad)
    #if name not in freeze:
       # param.requires_grad = False

False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
False
Fals

In [10]:
for name, param in model.named_parameters():
    if name not in freeze:
        param.requires_grad = False

tensor([     0,      0,      0,      0,      0,      0,      0,      0,     -3,
         -3068,  -7118,   9538,  10158,  24674, -11989,   9606,   8162,   1825,
        -21697,   -952,   7262,   9040,  13203,  -6195,   7949, -27337,  11089,
         -7603,  11225,  17944,  -1187, -10866,    109, -10552,  -3106,     10,
           913,   1962,     71, -16649, -10305,  -8244,  -5153, -10187,  26469,
          4644,   9736, -20112,  15853,   4911, -16257,   1089,    971,  -1111,
          5880,  -7596,    632,   4732,  -4522,  -1940,   -990,  -2861,  23868,
         22643,    -85, -14060,   -967,   3143,  20785, -12290, -18698,  17616,
         11215,  -4258,   5144,    395,  -1102,  -6471,    816,  15837,  11377,
           367,  -6402,  10859,  24935,  12810,  -9243,  15523,     36,   -128,
          2870,  -5135,    995,   1032,  -3447,  -3513,   -500,    946,  -1018,
        -21423,  -3819,  11492,  -2825,   2982,   3353,   5874,  -2092, -23861,
         -6688,  -7049,  -8281,  11366, 

In [None]:
test

In [None]:
e

In [None]:
f

In [None]:
g