# Bi-Directional Attention Flow Model

#### Dependencie Installation (Restart Kernel after installing!)

In [None]:
!pip install -U PyYAML
!pip install -U h5py
!pip install pytorch-lightning
!pip install matplotlib
!pip install nltk
!pip install spacy
!python -m spacy download en_core_web_lg

## Configure your Experiment below!

In [None]:
import sys, os
import torch
pwd = os.getcwd()

class Arguments():
    exp_folder = os.path.join(pwd, 'Experimente/newExperimentName')
    data = os.path.join(pwd, 'DATA', 'train_v2.1.json')
    word_rep = os.path.join(pwd, 'DATA', 'glove.840B.300d.txt')
    train_original_data = os.path.join(pwd, 'DATA', 'train_v2.1.json')
    val_data = os.path.join(pwd, 'DATA', 'dev_v2.1.json')
    cuda = torch.cuda.is_available()
    use_covariance = False
    force_restart = False
    train_splitted_data = os.path.join(pwd, 'DATA', 'train_part.json')
    test_splitted_data = os.path.join(pwd, 'DATA', 'eval_part.json')
    val_splitted_data = os.path.join(pwd, 'DATA', 'dev_part.json')
    test_reference_file = os.path.join(pwd, 'DATA', 'test_reference.json')

args = Arguments()

if not os.path.exists(args.exp_folder):
    os.makedirs(args.exp_folder)


#### Global Configurations (instead of config.yaml)

In [None]:
import yaml

config_yaml = """
    bidaf:
        dropout: 0.2
        num_highways: 2
        num_lstm: 2
        hidden_size: 100
        embedding_dim: 300
        embedding_reduce: 100
        characters:
            dim: 16
            num_filters: 100
            filter_sizes:
                - 5
    training:
        lr: 0.001
        betas:
            - 0.9
            - 0.999
        eps: 0.00000001
        weigth_decay: 0
        epochs: 1
        batch_size: 60
        limit: 400
"""
config = yaml.load(config_yaml, Loader=yaml.FullLoader)


#### Importing the MSMARCO Bidaf Model

In [None]:
sys.path.append(os.path.join(pwd,'MsmarcoQuestionAnswering','Baseline'))
sys.path.append(os.path.join(pwd,'MsmarcoQuestionAnswering','Baseline','scripts'))
sys.path.append(os.path.join(pwd,'MsmarcoQuestionAnswering','Evaluation'))

import MsmarcoQuestionAnswering.Baseline.scripts.checkpointing as checkpointing
import MsmarcoQuestionAnswering.Baseline.scripts.train as train_manager
import MsmarcoQuestionAnswering.Evaluation.ms_marco_eval as eval_manager
#import MsmarcoQuestionAnswering.Baseline.scripts.predict as predict_manager
from pytorch_lightning import LightningModule
from pytorch_lightning import Trainer
import h5py

In [None]:
import MsmarcoQuestionAnswering.Baseline.mrcqa as mrcqa
import MsmarcoQuestionAnswering.Baseline.scripts.dataset as dataset
import json as json
import numpy as np
from random import shuffle, randint

def try_to_split_testset(percentual_size_test, reduced_whole_size_train=1,reduced_whole_size_val=1, force_renew=False):
    if os.path.isfile(args.train_splitted_data) and os.path.isfile(args.test_splitted_data) and os.path.isfile(args.val_splitted_data) and not force_renew:
        return;
    else:
        args.force_restart = True
        with open(args.train_original_data) as f_o:
            train_json = json.load(f_o);
        qids = list(train_json['query_id'].keys());
        shuffle(qids);
        train_size = len(qids)
        train_size = int(reduced_whole_size_train * train_size)
        new_train_size = int((1 - percentual_size_test) * train_size)
        new_test_size = train_size - new_train_size
        print("New Train Set has {} Datapoints".format(new_train_size))
        print("New Test Set has {} Datapoints".format(new_test_size))

        
        qids_train = qids[0:new_train_size]
        qids_test = qids[new_train_size:train_size]
        
        def copy_dict_part(old_dict, qids):
            count = 0;
            new_dict = dict()
            keys = old_dict.keys()
            for qid in qids:
                count = count + 1;
                if count % 10000 == 0:
                    print('Copy progress: {}'.format(count/len(qids)))
                for key in keys:
                    if not key in new_dict:
                        new_dict[key] = dict()
                    new_dict[key][qid] = train_json[key][qid]
            return new_dict;
        
        print('Start creating new train set:')
        new_train = copy_dict_part(train_json, qids_train)
        print('Start creating new test set:')
        new_test = copy_dict_part(train_json, qids_test)
        
        with open(args.train_splitted_data, 'w') as write_f:
            write_f.write(json.dumps(new_train))
        with open(args.test_splitted_data, 'w') as write_f:
            write_f.write(json.dumps(new_test))
        
        create_reference_file(new_test, args.test_reference_file)
            
        with open(args.val_data) as f_o:
            val_json = json.load(f_o);
        qids = list(val_json['query_id'].keys())
        shuffle(qids)
        val_size = len(qids)
        new_val_size = int(reduced_whole_size_val * val_size)
        print("New Validation Set has {} Datapoints".format(new_val_size))

        qids_val = qids[0:new_val_size]
        print('Start creating new val set:')
        new_val = copy_dict_part(val_json, qids_val)
        #with open(args.val_splitted_data, 'w') as write_f:
        #    write_f.write(json.dumps(new_val))
            
def load_data(path,limit):
    with open(path) as f_o:
        data, _ = dataset.load_data(json.load(f_o), span_only=True, answered_only=True, loading_limit=limit)
    return data

def create_reference_file(data_obj, reference_file_path):
        print("Create test reference file")
        with open(reference_file_path, 'w+') as write_f:
            for qid in data_obj["answers"]:
                try:
                    correct = {"query_id": str(qid)}
                    correct["answers"] = data_obj["answers"][str(qid)]
                    write_f.write(json.dumps(correct))
                    write_f.write("\n")
                except KeyError:
                    print("Key Error: "+str(obj["query_id"]))
        print("Done creating reference file")

def init_model(id_to_token, id_to_char):
    return mrcqa.BidafModel.from_config(config['bidaf'], id_to_token, id_to_char)

def reload_model(checkpoint):
    model, id_to_token, id_to_char = mrcqa.BidafModel.from_checkpoint(config['bidaf'], checkpoint)
    if torch.cuda.is_available() and args.cuda:
        model.cuda()
    model.train()
    return model, id_to_token, id_to_char

def inverse_dict(base_dict):
    return {tok: id_ for id_, tok in base_dict.items()}

def get_loader(data, config, used_data_per_batch=1.0):
    data = dataset.EpochGen(
        data,
        batch_size=config.get('training', {}).get('batch_size', 32),
        shuffle=True,
        used_data_per_batch=used_data_per_batch)
    return data

def get_optimizer(model, config, state):
    """
    Get the optimizer
    """
    parameters = filter(lambda p: p.requires_grad,
                        model.parameters())
    """ ADAM Optimizer"""
    optimizer = torch.optim.Adam(
        parameters,
        lr=config['training'].get('lr', 0.01),
        betas=config['training'].get('betas', (0.9, 0.999)),
        eps=config['training'].get('eps', 1e-8),
        weight_decay=config['training'].get('weight_decay', 0))
    
    
    """ ADAGRAD Optimizer
    optimizer = torch.optim.Adagrad(
        parameters,
        lr=config['training'].get('lr', 1),
        weight_decay=config['training'].get('weight_decay', 0))
    """ 
    
    """ ADADELTA Optimizer 
    optimizer = torch.optim.Adadelta(
        parameters,
        lr=0.5)"""
    
    if state is not None:
        optimizer.load_state_dict(state)

    return optimizer

def load_pretrained_embeddings(path, model, id_to_token):
    with open(path) as f_o:
            pre_trained = dataset.SymbolEmbSourceText(f_o, set(tok for id_, tok in id_to_token.items() if id_ != 0))
    mean, cov = pre_trained.get_norm_stats(args.use_covariance)
    rng = np.random.RandomState(2)
    oovs = dataset.SymbolEmbSourceNorm(mean, cov, rng, args.use_covariance)
    model.embedder.embeddings[0].embeddings.weight.data = torch.from_numpy(dataset.symbol_injection(id_to_token, 0, model.embedder.embeddings[0].embeddings.weight.data.numpy(), pre_trained, oovs))

In [None]:
def new_init(train_path, val_path, test_path, config, args, loading_limit=None, used_data_per_train_epoch=1.0):
    token_to_id = {'': 0}
    char_to_id = {'': 0}
    print('Load Train Data [1/6]')
    train_data = load_data(train_path,loading_limit)
    print('Load Validation Data [1/6]')
    val_data = load_data(val_path,loading_limit)
    print('Load Test Data [1/6]')
    test_data = load_data(test_path,loading_limit)
    
    print('Tokenize Train Data [2/6]')
    train_data = dataset.tokenize_data(train_data, token_to_id, char_to_id)
    print('Tokenize Validation Data [2/6]')
    val_data = dataset.tokenize_data(val_data, token_to_id, char_to_id)
    print('Tokenize Test Data [2/6]')
    test_data = dataset.tokenize_data(test_data, token_to_id, char_to_id)
    
    train_loader = get_loader(train_data, config, used_data_per_batch=used_data_per_train_epoch)
    val_loader = get_loader(val_data, config) #, used_data_per_batch=used_data_per_train_epoch)
    test_loader = get_loader(test_data, config)

    print('Create Inverse Dictionaries [3/6]')
    id_to_token = inverse_dict(token_to_id)
    id_to_char = inverse_dict(char_to_id)

    print('Initiate Model [4/6]')
    model = init_model(id_to_token, id_to_char)

    if args.word_rep:
        print('Load pre-trained embeddings [5/6]')
        load_pretrained_embeddings(args.word_rep, model, id_to_token)
    else:
        print('No pre-trained embeddings given [5/6]')
        pass  # No pretraining, just keep the random values.

    if torch.cuda.is_available() and args.cuda:
        model.cuda()
    model.train()

    optimizer = get_optimizer(model, config, state=None)
    print('Done init_state [6/6]')
    return model, id_to_token, id_to_char, optimizer, train_loader, val_loader, test_loader   


def new_reload(train_path, val_path, test_path, checkpoint, training_state, config, args,loading_limit=None, used_data_per_train_epoch=1.0):
    print('Load Model from Checkpoint [1/5]')
    model, id_to_token, id_to_char = reload_model(checkpoint)

    optimizer = get_optimizer(model, config, training_state)

    print('Create Inverse Dictionaries [2/5]')
    token_to_id = inverse_dict(id_to_token)
    char_to_id = inverse_dict(id_to_char)

    len_tok_voc = len(token_to_id)
    len_char_voc = len(char_to_id)

    print('Load Train Data [3/5]')
    train_data = load_data(train_path,loading_limit)
    print('Load Validation Data [3/5]')
    val_data = load_data(val_path,loading_limit)
    print('Load Test Data [3/5]')
    test_data = load_data(test_path,loading_limit)
    
    limit_passage = config.get('training', {}).get('limit')

    print('Tokenize Train Data [4/5]')
    train_data = dataset.tokenize_data(train_data, token_to_id, char_to_id)
    print('Tokenize Validation Data [4/5]')
    val_data = dataset.tokenize_data(val_data, token_to_id, char_to_id)
    print('Tokenize Test Data [4/5]')
    test_data = dataset.tokenize_data(test_data, token_to_id, char_to_id)

    train_loader = get_loader(train_data, config, used_data_per_batch=used_data_per_train_epoch)
    val_loader = get_loader(val_data, config) #, used_data_per_batch=used_data_per_train_epoch)
    test_loader = get_loader(test_data, config)

    assert len(token_to_id) == len_tok_voc
    assert len(char_to_id) == len_char_voc

    print('Done reload_state [5/5]')
    return model, id_to_token, id_to_char, optimizer, train_loader, val_loader, test_loader


# Here the Loading,Splitting,Organizing and Tokenization of the given Data starts

The first lines can be used to modify the Dataset size used

In [None]:
PERCENTAGE_OF_DATA_TO_USE = 1 #(alpha)
PERCENTUAL_SIZE_OF_TEST_SET = 0.1 #(beta) #FEST
PERCENTAGE_OF_REDUCED_DATA_TO_USE_PER_EPOCH = 0.1 #(gamma)

try_to_split_testset(PERCENTUAL_SIZE_OF_TEST_SET,reduced_whole_size_train=PERCENTAGE_OF_DATA_TO_USE, reduced_whole_size_val=0.1, force_renew= True); #We use 100% of the given Data. And 10% of the Training Data will be used as Test Data. True means force rewrite Datasets.

checkpoint_w, training_state_w, epoch_w = train_manager.try_to_resume(
            args.force_restart, args.exp_folder)

if checkpoint_w:
    print('Resuming training...')
    model_w, id_to_token_w, id_to_char_w, optimizer_w, train_loader, val_loader, test_loader = new_reload(args.train_splitted_data, args.val_splitted_data, 
                                                                                                          args.test_splitted_data, checkpoint_w, 
                                                                                                          training_state_w, config, args, used_data_per_train_epoch=PERCENTAGE_OF_REDUCED_DATA_TO_USE_PER_EPOCH)
else:
    print('Preparing to train...')
    model_w, id_to_token_w, id_to_char_w, optimizer_w, train_loader, val_loader, test_loader = new_init(args.train_splitted_data, args.val_splitted_data, 
                                                                                                        args.test_splitted_data,config, args, used_data_per_train_epoch=PERCENTAGE_OF_REDUCED_DATA_TO_USE_PER_EPOCH)
    checkpoint_w = h5py.File(os.path.join(args.exp_folder, 'checkpoint'))
    checkpointing.save_vocab(checkpoint_w, 'vocab', id_to_token_w)
    checkpointing.save_vocab(checkpoint_w, 'c_vocab', id_to_char_w)

if torch.cuda.is_available() and args.cuda:
    train_loader.tensor_type = torch.cuda.LongTensor
    val_loader.tensor_type = torch.cuda.LongTensor
    test_loader.tensor_type = torch.cuda.LongTensor

### The Pytorch Lightning Model

In [None]:
epoch_saves = dict();
epoch_saves['train_loss'] = []
epoch_saves['val_loss'] = []
epoch_saves['test_loss'] = []

#Used for test evaluation
qid2candidate = {}

import re
regex_drop_char = re.compile('[^a-z0-9\s]+')
regex_multi_space = re.compile('\s+')

class BidafLightningWrapper(LightningModule):
    def __init__(self):
        super().__init__()
        
    def setup(self,stage):
        pass;
            
    def prepare_data(self):
        pass;

    def configure_optimizers(self):
        return optimizer_w;

    def forward(self, passage, p_lengths, question, q_lengths):
        return model_w(passage, p_lengths, question, q_lengths)

    def train_dataloader(self):
        return train_loader;
    
    def val_dataloader(self):
        return val_loader;
    
    def test_dataloader(self):
        return test_loader;

    def training_step(self, batch, batch_idx):
        qids, passages, queries, answers, _ = batch
        start_log_probs, end_log_probs = self(passages[:2], passages[2], queries[:2], queries[2])
        loss = model_w.get_loss(start_log_probs, end_log_probs, answers[:, 0], answers[:, 1])
        return {'loss': loss, 'train_loss': loss, 'log': {'train_loss': loss}}

    def training_epoch_end(self, results):
        checkpointing.checkpoint(model_w, epoch_w, optimizer_w, checkpoint_w, args.exp_folder)
        model_w.cuda()
        mean_loss = self.save_statistics('train',results)
        return {'log': {'train_loss': mean_loss}}
    
    def validation_step(self, batch, batch_idx):
        qids, passages, queries, answers, _ = batch
        start_log_probs, end_log_probs = self(passages[:2], passages[2], queries[:2], queries[2])
        loss = model_w.get_loss(start_log_probs, end_log_probs, answers[:, 0], answers[:, 1])
        return {'val_loss': loss, 'log': {'val_loss': loss}}
    
    def validation_epoch_end(self, results):
        val_loss_mean = self.save_statistics('val',results)
        return {'val_loss': val_loss_mean}
    
    def test_step(self, batch, batch_idx):
        qids, passages, queries, answers, mappings = batch
        start_log_probs, end_log_probs = self(passages[:2], passages[2], queries[:2], queries[2])
        loss = model_w.get_loss(start_log_probs, end_log_probs, answers[:, 0], answers[:, 1])

        predictions = model_w.get_best_span(start_log_probs, end_log_probs)
        predictions = predictions.cpu()
        passages = passages[0].cpu().data
        for qid, mapping, tokens, pred in zip(qids, mappings, passages, predictions):
            toks = tokens[pred[0]:pred[1]]
            start = mapping[pred[0], 0]
            end = mapping[pred[1]-1, 1]
            toks = regex_multi_space.sub(' ', regex_drop_char.sub(' ', ' '.join(id_to_token_w[int(tok)] for tok in toks).lower())).strip()
            if qid not in qid2candidate:
                qid2candidate[qid] = []
            qid2candidate[qid].append(str(toks))
        return {'test_loss': loss}

    def test_epoch_end(self, results):
        no_ans_set = set()
        out_dict = {}
        
        #print("\t no answer set")
        for qid in qid2candidate:
            if len(qid2candidate[qid]) < 1 or 'No Answer Present.' in qid2candidate[qid]:
                no_ans_set.add(qid)
        #print("\t take random answer from possible ones")
        for qid in qid2candidate:
            pick = randint(0,len(qid2candidate[qid])-1)
            out_dict[qid] = [qid2candidate[qid][pick]]
        
        mean_test_loss = self.save_statistics('test',results)
        test_metrics = eval_manager.compute_metrics_from_model(args.test_reference_file, out_dict, no_ans_set)
        outputfile = os.path.join(args.exp_folder,'metrics.json')
        with open(outputfile,'w+') as f_o:
            f_o.write(json.dumps(test_metrics))
        return {'log': {'test_loss': mean_test_loss}}

    def save_statistics(self, phase, results):
        key = phase + '_loss'
        mean_loss = torch.stack([step[key] for step in results]).mean()
        print("Mean {} Loss: {}".format(phase,mean_loss))
        #print(epoch_saves.keys())
        epoch_saves[key].append([step[key] for step in results])
        return mean_loss



Start new Training

In [None]:
from pytorch_lightning.callbacks import EarlyStopping

modelLightning = BidafLightningWrapper()
#-------Early Stopping-------
early_stopping = EarlyStopping('val_loss', patience=3)
trainer = Trainer(gpus=1, early_stop_callback=early_stopping)
trainer.fit(modelLightning)
trainer.test()

Load Checkpoint and resume Training

In [None]:
from pytorch_lightning.callbacks import EarlyStopping

modelLightning = BidafLightningWrapper()
early_stopping = EarlyStopping('val_loss')
trainer = Trainer(gpus=1, resume_from_checkpoint= os.path.join(args.exp_folder,"checkpoint.ckpt"))
trainer.fit(modelLightning)
trainer.test(modelLightning)

Save Pytorch Lightning Checkpoint

In [None]:
trainer.save_checkpoint(os.path.join(args.exp_folder,"checkpoint.ckpt"))

#### Save Loss to statistics.json (to be executed directly after training)

In [None]:
from pathlib import Path
outputfile = os.path.join(args.exp_folder,'statistics.json')
Path(outputfile).touch()

In [None]:
def writeStatisticToDict(key, output_dict, start_epoch = 0):
    output_dict[key] = dict()
    stat_saves = epoch_saves[key]

    for idx in range(0 + start_epoch,len(stat_saves)+start_epoch):
        output_dict[key][idx] = dict()
        for jdx in range(0,len(stat_saves[idx])):
            output_dict[key][idx][jdx] = stat_saves[idx][jdx].item() 

In [None]:
output_dict = dict()

writeStatisticToDict('train_loss',output_dict)
writeStatisticToDict('val_loss',output_dict)
writeStatisticToDict('test_loss',output_dict)

with open(outputfile, 'w') as write_f:
    write_f.write(json.dumps(output_dict))

#### Load Loss from statistics.json

In [None]:
import json
import numpy as np
outputfile = os.path.join(args.exp_folder,'statistics.json')
with open(outputfile, 'r') as read_f:
    statistics = json.load(read_f)

Plot per Epoch Mean Loss

In [None]:
from matplotlib import pyplot as plt

def visualizeEpochMeanLoss(key, stats, skip_first=False, plot_variance=True, color = None, label = None):
    
    if not label:
        label = key
    
    epoch_stats_dict = stats[key]
    mean_loss = []
    var_loss = []
    first = True
    for epochNum in epoch_stats_dict:
        if first and skip_first:
            first = False
            continue
        epoch_stats = epoch_stats_dict[epochNum].values()
        epoch_stats = np.array(list(epoch_stats))
        mean_loss_in_actual_epoch = np.average(epoch_stats)
        mean_loss = mean_loss + [mean_loss_in_actual_epoch]
        variance_loss_in_actual_epoch = np.var(epoch_stats)
        var_loss = var_loss + [variance_loss_in_actual_epoch]
    
    x = np.arange(0,len(epoch_stats_dict.keys()) - skip_first)
    if plot_variance:
        plt.errorbar(x, mean_loss, yerr=var_loss, label = label)
    else:
        if color:
            plt.plot(x,mean_loss, label = label, color = color)
        else:    
            plt.plot(x,mean_loss, label = label)
    
def visualizeTestMeanLoss(train_key, test_key, stats):
    epoch_stats_dict = stats[test_key]
    mean_loss = []
    epoch_stats = epoch_stats_dict['0'].values()
    mean_loss_in_actual_epoch = sum(epoch_stats)/len(epoch_stats)
    for epochs in stats[train_key]:
        mean_loss = mean_loss + [mean_loss_in_actual_epoch]
    plt.plot(mean_loss, label = test_key)

In [None]:
visualizeEpochMeanLoss('train_loss', statistics)
visualizeEpochMeanLoss('val_loss',statistics, skip_first=True)
visualizeTestMeanLoss('train_loss','test_loss',statistics)
plt.legend(loc='upper right', frameon=True)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.ylim(3,6)
plt.savefig(os.path.join(args.exp_folder, 'loss.png'))
#plt.show()

Plot per Batch Loss

In [None]:
from matplotlib import pyplot as plt
def visualizeLoss(key, stats):
    batch_stats = stats[key]
    epoch_stats = []
    for key in batch_stats:
        epoch_stats = epoch_stats + list(batch_stats[key].values())
    plt.plot(epoch_stats)

In [None]:
visualizeLoss('train_loss', statistics)
visualizeLoss('val_loss',statistics)
visualizeLoss('test_loss',statistics)

Plotting Multiple Validation Curves (Requires LearnCurve_01,LearnCurve_02,LearnCurve_03,LearnCurve_04,LearnCurve_05,LearnCurve_06 and EarlyStoppingAdam experiments)

In [None]:
from matplotlib import pyplot as plt
import os
pwd = os.getcwd()
import json
import numpy as np


for i in range(1,7,1):
    path= os.path.join(pwd, 'Experimente/LearnCurve_0' + str(i),'statistics.json')
    with open(path, 'r') as read_f:
        statistics = json.load(read_f)
    visualizeEpochMeanLoss('val_loss',statistics, skip_first=True,plot_variance=False, color = (0,0,(i)/8,(i)/8), label = '0.' + str(i))
    
path = os.path.join(pwd, 'Experimente/EarlyStoppingAdam', 'statistics.json')
with open(path, 'r') as read_f:
    statistics = json.load(read_f)
visualizeEpochMeanLoss('val_loss',statistics, skip_first=True,plot_variance=False, color = (0,0,8/8,(8)/8), label = '1' )

plt.legend(loc='upper right', frameon=True)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.ylim(2,3.5)
plt.savefig(os.path.join(pwd, 'multiple_vals_1.png'))

    
    

    

Plotting Learn Curve

In [None]:
from matplotlib import pyplot as plt
import os
pwd = os.getcwd()
import json
import numpy as np

def getLastMeanVar(key, stats):
    epoch_stats_dict = stats[key]
    first = True
    epochNum = list(epoch_stats_dict.keys()).pop()
    epoch_stats = epoch_stats_dict[epochNum].values()
    epoch_stats = np.array(list(epoch_stats))
    mean_loss_in_actual_epoch = np.average(epoch_stats)
    variance_loss_in_actual_epoch = np.var(epoch_stats)
    return (mean_loss_in_actual_epoch,variance_loss_in_actual_epoch)

In [None]:
def plotLastMeanVar(key):
    mean_loss = []
    var_loss = []
    x = []
    for i in range(1,7,1):
        path= os.path.join(pwd, 'Experimente/LearnCurve_0' + str(i),'statistics.json')
        with open(path, 'r') as read_f:
            statistics = json.load(read_f)
        mean,var = getLastMeanVar(key,statistics)
        mean_loss = mean_loss + [mean]
        var_loss = var_loss + [var]  
        x = x + ['0.'+str(i)]
    path = os.path.join(pwd, 'Experimente/EarlyStoppingAdam', 'statistics.json')
    with open(path, 'r') as read_f:
        statistics = json.load(read_f)
    mean,var = getLastMeanVar('val_loss',statistics)
    mean_loss = mean_loss + [mean]
    var_loss = var_loss + [var]
    x = x + ['1']

    plt.errorbar(x, mean_loss, yerr=var_loss, label = key)

plotLastMeanVar('val_loss')
plotLastMeanVar('train_loss')



plt.legend(loc='upper right', frameon=True)
plt.xlabel("Dataset Size")
plt.ylabel("Loss")
plt.ylim(0,3.5)
plt.savefig(os.path.join(pwd, 'learncurve.png'))


Get a Summary of the Model

In [None]:
print(model_w)

In [None]:
sum(p.numel() for p in model_w.parameters() if p.requires_grad)