In [1]:
import  torch, os
import  pandas as pd
import  numpy as np
import  scipy.stats
from    torch.utils.data import DataLoader
import  sys

sys.path.append('..')
from shared.datasets import * 
from shared.meta import *


def main():
    n_way = 3
    k_shot = 20
    k_query = 16
    num_workers = 12
    train_num_episodes = 6
    test_num_episodes = 6
    bs = 1
    root = '../../../../scratch/rl80/mimic-cxr-jpg-2.0.0.physionet.org/files'
    path_splits = '../splits/splits.csv'  # Location of preprocessed splits
    path_results = '../../results'  # Folder to save the CSV results

    update_lr = 1e-2 # Learning rate for meta-training
    meta_lr = 1e-3 # Learing rate for meta-testing
    update_step = 5 # Number of meta-training update steps
    update_step_test = 10 # Number of meta-testing update steps
    imgsz = 224 # Size of images
    imgc = 1 # Initial image channels

    # Learner model configuration
    config = [
        ('conv2d', [64, 1, 3, 3, 1, 1]),
        ('relu', [True]),
        ('bn', [64]),
        ('max_pool2d', [2, 2, 0]),
        ('conv2d', [64, 64, 3, 3, 1, 1]),
        ('relu', [True]),
        ('bn', [64]),
        ('max_pool2d', [2, 2, 0]),
        ('conv2d', [64, 64, 3, 3, 1, 1]),
        ('relu', [True]),
        ('bn', [64]),
        ('max_pool2d', [2, 2, 0]),
        ('conv2d', [64, 64, 3, 3, 1, 1]),
        ('relu', [True]),
        ('bn', [64]),
        ('max_pool2d', [2, 2, 0]),
        ('flatten', []),
        ('linear', [n_way, 64 * 14 * 14])
    ]

    torch.cuda.set_device(0)
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    maml = Meta(update_lr, meta_lr, n_way, k_shot, k_query, bs,
                update_step, update_step_test, imgc, imgsz, config).to(device)
    tmp = filter(lambda x: x.requires_grad, maml.parameters())
    num = sum(map(lambda x: np.prod(x.shape), tmp))
    #print(maml)
    #print('Total trainable tensors:', num)

    # Create batched episode datasets
    mini = MimicCxrJpgEpisodes(root, path_splits, n_way, k_shot, k_query, train_num_episodes, mode="base")
    mini_test = MimicCxrJpgEpisodes(root, path_splits, n_way, k_shot, k_query, test_num_episodes, mode="novel")

    # fetch meta_batchsz num of episode each time
    db = DataLoader(mini, batch_size=bs, shuffle=True, num_workers=num_workers, pin_memory=True)

    # Keep track of best meta-testing results
    best_score = 0
    best_step = 0

    for step, (x_spt, y_spt, x_qry, y_qry) in enumerate(db):

        x_spt, y_spt, x_qry, y_qry = x_spt.to(device), y_spt.to(device), x_qry.to(device), y_qry.to(device)

        accs = maml(x_spt, y_spt, x_qry, y_qry)

        #if step % 30 == 0:
        #    print('step:', step, '\ttraining acc:', accs)

        if step % 5 == 0:  # evaluation
            # Create Dataframe containing results of the multiple episodes
            df_results = pd.DataFrame(columns=['Step', 'Accuracy', 'Macro Accuracy',
                                               'Macro-F1 Score'] + [str(x) + ' F1' for x in range(n_way)])
            db_test = DataLoader(mini_test, 1, shuffle=True, num_workers=1, pin_memory=True)
            accs_all_test = []

            for x_spt, y_spt, x_qry, y_qry in db_test:
                x_spt, y_spt = x_spt.squeeze(0).to(device), y_spt.squeeze(0).to(device)
                x_qry, y_qry = x_qry.squeeze(0).to(device), y_qry.squeeze(0).to(device)

                # Record the best step per episode
                df_best = maml.finetunning(x_spt, y_spt, x_qry, y_qry)
                df_results = df_results.append(df_best.loc[0], ignore_index=True)

            print(f'Step: {step} Test Results')
            print(df_results[["Step", "Accuracy", "Macro-F1 Score"]])

            # Todo: find if its the best testing scenario
            average_accuracy = df_results["Accuracy"].mean()
            average_f1 = df_results["Macro-F1 Score"].mean()

            score = 0.5*average_accuracy + 0.5*average_f1
            if score > best_score:
                best_score = score
                best_step = step
                df_best_test = df_results

    print(f"Best Step: {best_step}")

    # Create results folder if it does not exist
    if not os.path.exists(path_results):
        os.makedirs(path_results)

    df_best_test.to_csv(os.path.join(path_results, f'{k_shot}shot_MAML_{best_step}.csv'), index=False)


if __name__ == '__main__':
    main()


Step: 0 Test Results
   Step  Accuracy  Macro-F1 Score
0   9.0  0.395833        0.330247
1  10.0  0.458333        0.440637
2   9.0  0.354167        0.353802
3   8.0  0.312500        0.311994
4  10.0  0.395833        0.393407
5   8.0  0.395833        0.385699
Step: 5 Test Results
   Step  Accuracy  Macro-F1 Score
0   9.0  0.333333        0.333537
1   8.0  0.354167        0.334663
2   6.0  0.437500        0.362715
3   8.0  0.437500        0.426684
4  10.0  0.437500        0.427439
5  10.0  0.375000        0.365572
Best Step: 5
