# 1. Setup

## 1.1 Imports

In [5]:
#imports
import torch
from torch.utils.data import Dataset,DataLoader,Subset
import pandas as pd
from collections import namedtuple
from transformers import BertTokenizer
import numpy as np
from transformers import BertForSequenceClassification,BertConfig
from torch.optim import Adam
from torch.nn import CrossEntropyLoss    
from sklearn.metrics import accuracy_score,precision_score,confusion_matrix
from pathlib import Path
import pathlib

import matplotlib.pyplot as plt
import os
import json
import simplejson
import warnings



## 1.2 Cuda

use cuda if available

In [6]:
#define device
device= torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")

## 1.3 Parameters

This section holds all tuneable parameters of this notebook seperated into different categories according to target area

In [7]:
notebook_parameters={"model_parameters":None,
                     "training_parameters":None,
                     "data_parameters":None,
                     "storage_parameters":None}

### 1.3.1 Model Parameters

In [8]:
notebook_parameters["model_parameters"]={"num_hidden_layers":24,
                                         "hidden_dropout_prob":0.3
                                        }

### 1.3.2 Training Parameters

In [9]:
notebook_parameters["training_parameters"]={"learningrate": 2e-5,
                                            "number_of_epochs": 2,
                                            "loss_function": CrossEntropyLoss()
                                            }

### 1.3.3 Data parameters

In [10]:
notebook_parameters["data_parameters"]={"batchsize":6,
                                        "path_to_train_data":r"C:\Users\nick\Code\MachineLearning_Projects\Bewerbung_NLP\data\v1\v1\train.tsv",
                                        "train_limit": -1,
                                        "path_to_valid_data":r"C:\Users\nick\Code\MachineLearning_Projects\Bewerbung_NLP\data\v1\v1\dev.tsv",
                                        "valid_limit": -1,
                                        }

### 1.3.4 Storage parameters

In [11]:
notebook_parameters["storage_parameters"]= {"path_to_model_storage": Path(r"C:\Users\nick\Code\MachineLearning_Projects\Bewerbung_NLP\trainings\trained_models")
                                           }

# 2. Datasets and Transforms

## 2.1 Original Dataset

Create a class that will hold the initial dataset, directly derived from the translated original data

In [12]:
class Blogpost_dataset(Dataset):
    """
    this class serves as a custom dataset for the given blog posts
    """

    def __init__(self,src_path,transform=None):
        """
        task: inits the dataset and sets optional transforms \n
        parameters: src_path(str(path to the underlying source data)), transform(optional transformation that may be applied to each sample ) \n
        return value:
        """

        self.src_df = pd.read_csv(src_path,sep="\t")
        self.transform= transform
        self.Dataset_item= namedtuple("Dataset_item",["text","label"]) 

    def __len__(self):
        """
        task: return the length of the underlying source DataFrame \n
        parameters:\n
        return value:
        """

        return len(self.src_df)

    def __getitem__(self, index):
        """
        task: return the item at the given index \n
        parameters:\n
        return value:
        """

        #transform the index to a list in case it is a tensor
        if torch.is_tensor(index):
            index= index.tolist()

        #fetch item from source df
        item=self.Dataset_item(self.src_df.iloc[index]["text"],self.src_df.iloc[index]["label"])

        #apply transform if available
        if self.transform:
            item= self.transform(item)

        return item


## 2.2 BERT compatible Dataset

Create a class that will serve as a dataset for the data in BERT compatible, already tokenized form

In [13]:
class Bert_compatible_dataset(Dataset):
    """
    this class holds a dataset that was transformed using the BertTransform
    """

    def __init__(self,input_ids_list,attention_mask_list,token_type_ids_list,label_list,text_list=None,transform=None):
        """
        task: create a dataset from the given lists of tokens. if text_list is given that column will hold the original text \n
        parameters:input_ids_list(list(token id)), attention_mask_list(list(attention mask)),token_type_ids_list(list(token_type_id)) ,label_list(list(label)), text_list(list(optional: orignal text))\n
        return value:
        """

        #create a class that will hold one element of data
        self.Dataset_item= namedtuple("Bert_dataset_item",["input_ids","attention_mask","token_type_ids","label","text"])

        #this list will store all data 
        self.data= []
        
        if text_list:
            #assert that all of those lists are of same length
            assert len(input_ids_list)==len(attention_mask_list)==len(token_type_ids_list)==len(label_list)==len(text_list),"length of lists has to match"
        
            #zip the lists together
            for input_ids,attention_mask,token_type_ids,label,text in zip(input_ids_list,attention_mask_list,token_type_ids_list,label_list,text_list):

                #create a namedtuple storing that data and append it to self.data
                item= self.Dataset_item(input_ids,attention_mask,token_type_ids,label,text)
                self.data.append(item)

        else:
            assert len(input_ids_list)==len(attention_mask_list)==len(token_type_ids_list)==len(label_list),"length of lists has to match"

            #zip the lists together
            for input_ids,attention_mask,token_type_ids,label in zip(input_ids_list,attention_mask_list,token_type_ids_list,label_list):

                #create a namedtuple storing that data and append it to self.data
                item= self.Dataset_item(input_ids,attention_mask,token_type_ids,label,0)
                self.data.append(item)

        self.transform = transform


    def __len__(self):
        """
        task: return the length of self.data field \n
        parameters:\n
        return value:
        """

        return len(self.data)

    def __getitem__(self, index):
        """
        task: return the item at index \n
        parameters:\n
        return value:
        """

        #transform the index to a list in case it is a tensor
        if torch.is_tensor(index):
            index= index.tolist()

        data=self.data[index]

        if self.transform:
            data= self.transform(data)

        return data


## 2.3 Transforms

Create a Transform that will transform bert compatible data into tensors on the given device

In [14]:
class BertToTensor(object):
    """
    This class serves as a transform to transfer the elements of namedtuple into tensors on the given device
    """

    def __init__(self,device="cpu"):
        """
        task:  \n
        parameters:\n
        return value:
        """

        self.device=device

    def __call__(self,named_tuple):
        """
        task: transform the elements of the named tuple into tensors and ship them over to self.device \n
        parameters: named_tuple("Bert_dataset_item",["input_ids","attention_mask","token_type_ids","label","text"]) \n
        return value: transformed elements
        """

        #unpack the named tuple
        input_ids,attention_mask,token_type_ids,label,text = named_tuple

        #transform to tensor
        input_ids= torch.IntTensor(input_ids).to(device=self.device)
        attention_mask= torch.IntTensor(attention_mask).to(device=self.device)
        token_type_ids= torch.IntTensor(token_type_ids).to(device=self.device)
        label= torch.IntTensor(label).to(device=self.device)
        text= torch.IntTensor(text).to(device=self.device)

        return input_ids,attention_mask,token_type_ids,label,text

create a transform that takes the original data as imput and tokenizes it using the BERTTokenizer

In [15]:
class BertTransform(object):
    """
    this class will serve as a transform that tokenizes a given text using the bert tokenizer
    """

    def __init__(self,max_length):
        """
        task: init the transform and creates a bert tokenizer\n
        parameters:\n
        return value:
        """

        self.tokenizer= BertTokenizer.from_pretrained('bert-base-chinese',do_lower_case=True)
        self.max_length= max_length

    def __call__(self,item):
        """
        task: when called transform the given items' text field by applying bertTokenization  \n
        parameters:\n
        return value:
        """

        #transform the 
        transformed_text= self.tokenizer.encode_plus(
            item.text,
            add_special_tokens=True, #adds beginning(CLS)) and end(SEP) tokens of sequence)
            max_length= self.max_length,
            pad_to_max_length=True, # makes the tokenizer fill the token vectors with padding tokens if the sequence is smaller than max_length
            return_attention_mask = True
        )

        Dataset_item=namedtuple("Dataset_item",["text","label"]) 
        return Dataset_item(transformed_text,item.label)

a method that creates an original dataset and transforms it into a bert compatibel one

In [16]:
def transform_original_dataset_2_bert_compatible(src_path,max_length=512,limit=-1,device="cpu"):
    """
    task: use the BertTokenize transform to tokenize the given original Dataset and thus create a dataset that is bert compatible \n
    parameters: src_path(path to original data), max_length(int(max length allowed for transformer, 512 for bert)),limit(number of entries to use) \n
    return value: torch.utils.data.Dataset
    """

    #create BertTransform
    transform=BertTransform(max_length)

    #create the original Blogpost_dataset
    blogpost_ds=Blogpost_dataset(src_path,transform=transform)

    #pull a subset of the dataset if a limit was given
    if limit>0:
        indices= np.random.choice(range(len(blogpost_ds)),size=limit,replace=False) #choose random indices
        blogpost_ds= Subset(blogpost_ds,indices)

    #lists that will stored the transformed/tokenized text
    input_ids_list = []
    token_type_ids_list = []
    attention_mask_list = []
    label_list = []

    #iterate over the dataset with transform and catch the thus transformed texts by putting them into a new dataset
    for transformed_text,label in blogpost_ds:

        #apppend the contents to the corresponding list
        input_ids_list.append(transformed_text['input_ids'])
        token_type_ids_list.append(transformed_text['token_type_ids'])
        attention_mask_list.append(transformed_text['attention_mask'])
        label_list.append([label])

    #return the Bert_compatible_dataset derived from those lists
    return Bert_compatible_dataset(input_ids_list,attention_mask_list,token_type_ids_list,label_list,transform=BertToTensor(device))


## 2.4 Dataset and Dataloader Creation

train dataset/dataloader

In [17]:
#create a bert compatible dataset
bert_train_ds= transform_original_dataset_2_bert_compatible(notebook_parameters["data_parameters"]["path_to_train_data"],limit=notebook_parameters["data_parameters"]["train_limit"],device=device)

#create a corresponding dataloader
bert_train_dl= DataLoader(bert_train_ds,batch_size=notebook_parameters["data_parameters"]["batchsize"],shuffle=True)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


In [18]:
for sample in bert_train_ds:
    print(sample)
    break

(tensor([ 101, 2945, 6432, 1305, 1849, 2209, 6206, 6842, 1139, 4767, 3779, 6783,
        1139, 1744, 5299, 5302, 8024, 6821, 3221, 2335, 3249, 6842, 5408, 4060,
        4638, 6825, 7219, 1353, 2418, 8024,  738, 3221,  686, 4518, 4914, 2415,
        1469, 3419, 2229, 7028, 3354, 4638, 3322, 5357,  511, 2190, 4767, 3779,
        3867, 6589, 1744, 2418, 6421, 3221, 1962,  752,  102,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,    0,
           0,    0,    0,    0,    0,  

validation dataset/dataloader

In [19]:
#create a bert compatible dataset
bert_valid_ds= transform_original_dataset_2_bert_compatible(notebook_parameters["data_parameters"]["path_to_valid_data"],limit=notebook_parameters["data_parameters"]["valid_limit"],device=device)

#create a corresponding dataloader
bert_valid_dl= DataLoader(bert_valid_ds,batch_size=notebook_parameters["data_parameters"]["batchsize"],shuffle=True)

Truncation was not explicitly activated but `max_length` is provided a specific value, please use `truncation=True` to explicitly truncate examples to max length. Defaulting to 'longest_first' truncation strategy. If you encode pairs of sequences (GLUE-style) with the tokenizer you can select this strategy more precisely by providing a specific strategy to `truncation`.


# 3. The Model

The model used will be BERT of google

# 3.1 Training

### 3.1.1 Model and optimizer init

use the Bert Config class to configure the Bert model used

In [20]:
bert_config= BertConfig(num_hidden_layers=notebook_parameters["model_parameters"]["num_hidden_layers"],hidden_dropout_prob=notebook_parameters["model_parameters"]["hidden_dropout_prob"])

initialize optimizer and model

In [21]:
#init the model
model= BertForSequenceClassification(bert_config).from_pretrained('bert-base-chinese').to(device)

#use adam optimizer
optimizer= Adam(model.parameters(),lr=notebook_parameters["training_parameters"]["learningrate"])

Some weights of the model checkpoint at bert-base-chinese were not used when initializing BertForSequenceClassification: ['cls.predictions.bias', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight']
- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at

### 3.1.2 Loss Function

Given the binary classification problem, the cross entropy loss function comes in quite handy

In [22]:
#use Crossentropy loss
criterion = notebook_parameters["training_parameters"]["loss_function"]


### 3.1.3 Training loop

In [23]:
def train_model(model,dataloader,loss_func,optimizer,number_of_epochs,calc_ewma=False,**ewma_kwargs):
    """
    task: train the given model using the given dataloader for number_of_epochs \n
    parameters: model(torch.nn.Module subclass),loss_func(torch.nn loss function), optimizer(torch.optim optimizer), number_of_epochs(int(number of epochs to train)),calc_ewma(bool(true if ewma shall be calculated)),ewma_kwargs(kwargs that will be passed to ewma calculation) \n
    return value: None, but it is a generator yielding the loss of each batch iteration together with the number of epochs and batch id
    """

    if calc_ewma:
        loss_history=pd.DataFrame(columns=["batch_number","batch_loss"])
    current_ewma=None
    
    for epoch_id in range(number_of_epochs):

        for batch_id,batch in enumerate(dataloader,start=1):

            #unpack the batch
            input_ids_list,attention_mask_list,token_type_ids_list,label_list,*_=batch

            #zero gradients
            optimizer.zero_grad()

            #forward_pass
            output=model(input_ids_list,attention_mask_list,token_type_ids_list)
            #something is wrong with the model inputs here, prob cuz they are no tensors

            #compute loss
            batch_loss=criterion(output.logits,label_list.flatten().to(dtype=torch.long))
            batch_loss.backward()

            #optimize
            optimizer.step()
            
            #calc ewma if desired
            if calc_ewma:
                loss_history=loss_history.append({"batch_number": epoch_id*len(dataloader)+batch_id,
                                                  "batch_loss": batch_loss}
                                                  ,ignore_index=True)
                current_ewma=loss_history.ewm(**ewma_kwargs).mean().iloc[-1]["batch_loss"]

            #yield training info about current batch
            yield epoch_id,batch_id, batch_loss,current_ewma



In [24]:
%matplotlib notebook

#create a plot for training info
x=[0]
y=[0]
ewma_list=[None]
plt.ion()
info_fig,axes= plt.subplots(figsize=(10,8))
batch_plot,= axes.plot(x,y)
ewma_plot, =axes.plot(x,ewma_list, "red")
plt.title("training performance")
plt.xlabel("batch iteration")
plt.ylabel("cross entropy batch loss")
plt.legend(["batch_loss","EWMA of batch_loss"])
plt.gcf().text(0.5,0,"0.00% of training done",fontsize=14)


#execute the training loop and print the training info
for epoch_id,batch_id,batch_loss,ewma in train_model(model,bert_train_dl,criterion,optimizer,notebook_parameters["training_parameters"]["number_of_epochs"],calc_ewma=True,halflife=10):
    
    #print training info
    print(f"[epoch {epoch_id} _ batch {batch_id}]: loss= {batch_loss}")
    
    #calculate the percentage of training done and add it as text
    batch_number=epoch_id*len(bert_train_dl)+batch_id
    percentage_done= round(batch_number/(notebook_parameters["training_parameters"]["number_of_epochs"]*len(bert_train_dl))*100,2)
    info_fig.texts[0].set_text(f"{percentage_done:.2f}% of training done")
    
    #update plot with current training info
    x.append(batch_number)
    y.append(batch_loss)
    ewma_list.append(ewma)
    
    batch_plot.set_data(x,y)
    ewma_plot.set_data(x,ewma_list)
    axes.relim()
    axes.autoscale_view(True,True,True)
    
    info_fig.canvas.draw()
    info_fig.canvas.flush_events()
    

<IPython.core.display.Javascript object>

[epoch 0 _ batch 1]: loss= 0.6223054528236389
[epoch 0 _ batch 2]: loss= 0.786313533782959
[epoch 0 _ batch 3]: loss= 0.4928818643093109
[epoch 0 _ batch 4]: loss= 0.6485455632209778
[epoch 0 _ batch 5]: loss= 0.5643076300621033
[epoch 0 _ batch 6]: loss= 0.5214678645133972
[epoch 0 _ batch 7]: loss= 0.5779505372047424
[epoch 0 _ batch 8]: loss= 0.5096936821937561
[epoch 0 _ batch 9]: loss= 0.5329797863960266
[epoch 0 _ batch 10]: loss= 1.0573731660842896
[epoch 0 _ batch 11]: loss= 0.495487779378891
[epoch 0 _ batch 12]: loss= 0.49760326743125916
[epoch 0 _ batch 13]: loss= 0.7478143572807312
[epoch 0 _ batch 14]: loss= 0.5689457058906555
[epoch 0 _ batch 15]: loss= 0.6313595771789551
[epoch 0 _ batch 16]: loss= 0.5527124404907227
[epoch 0 _ batch 17]: loss= 0.645791232585907
[epoch 0 _ batch 18]: loss= 0.6129927635192871
[epoch 0 _ batch 19]: loss= 0.8046684861183167
[epoch 0 _ batch 20]: loss= 0.7673191428184509
[epoch 0 _ batch 21]: loss= 0.697790801525116
[epoch 0 _ batch 22]: los

[epoch 0 _ batch 175]: loss= 0.7043684124946594
[epoch 0 _ batch 176]: loss= 0.5534761548042297
[epoch 0 _ batch 177]: loss= 0.5199676156044006
[epoch 0 _ batch 178]: loss= 0.6533022522926331
[epoch 0 _ batch 179]: loss= 0.6703901886940002
[epoch 0 _ batch 180]: loss= 0.3780886232852936
[epoch 0 _ batch 181]: loss= 0.5870404243469238
[epoch 0 _ batch 182]: loss= 0.8133097290992737
[epoch 0 _ batch 183]: loss= 0.5772420763969421
[epoch 0 _ batch 184]: loss= 0.8462125658988953
[epoch 0 _ batch 185]: loss= 0.5383930802345276
[epoch 0 _ batch 186]: loss= 0.5302768349647522
[epoch 0 _ batch 187]: loss= 0.3911506235599518
[epoch 0 _ batch 188]: loss= 0.670694887638092
[epoch 0 _ batch 189]: loss= 0.419242262840271
[epoch 0 _ batch 190]: loss= 0.5833125710487366
[epoch 0 _ batch 191]: loss= 0.8526368141174316
[epoch 0 _ batch 192]: loss= 0.48931726813316345
[epoch 0 _ batch 193]: loss= 0.8845892548561096
[epoch 0 _ batch 194]: loss= 0.587894856929779
[epoch 0 _ batch 195]: loss= 0.75006276369

[epoch 1 _ batch 96]: loss= 1.0397533178329468
[epoch 1 _ batch 97]: loss= 0.3004377782344818
[epoch 1 _ batch 98]: loss= 0.7051282525062561
[epoch 1 _ batch 99]: loss= 0.6532800793647766
[epoch 1 _ batch 100]: loss= 0.31356281042099
[epoch 1 _ batch 101]: loss= 0.4030980169773102
[epoch 1 _ batch 102]: loss= 0.4749681055545807
[epoch 1 _ batch 103]: loss= 0.434798926115036
[epoch 1 _ batch 104]: loss= 1.2443411350250244
[epoch 1 _ batch 105]: loss= 0.3010285198688507
[epoch 1 _ batch 106]: loss= 0.6679202914237976
[epoch 1 _ batch 107]: loss= 0.49746620655059814
[epoch 1 _ batch 108]: loss= 0.7528106570243835
[epoch 1 _ batch 109]: loss= 0.5444551110267639
[epoch 1 _ batch 110]: loss= 0.7416856288909912
[epoch 1 _ batch 111]: loss= 0.2679084837436676
[epoch 1 _ batch 112]: loss= 0.39168453216552734
[epoch 1 _ batch 113]: loss= 0.7008721232414246
[epoch 1 _ batch 114]: loss= 0.3685493469238281
[epoch 1 _ batch 115]: loss= 0.573824942111969
[epoch 1 _ batch 116]: loss= 0.532822072505950

## 3.2 Model evaluation

In [25]:
class Metric_composer():
    """
    this class is used to evaluate model predictions on multiple metrics simultaneously
    """
    
    def __init__(self,metrics_dict):
        """
        task: init composer and set its metrics field \n
        parameters:metrics_dict(sting(metric name): callable(metric func))\n
        return value:
        """
        
        self.metrics= metrics_dict
        
    def evaluate(self,y_truth,y_pred):
        """
        task: use the metrics in the self.metrics field to evaluate the given y values \n
        parameters: y_truth(ground truth labels), y_pred(predicted labels) \n
        return value: dict(string(name):evaluation of corresponding function)
        """
    
        evaluation={}
        for name,metric_func in self.metrics.items():
            
            evaluation[name]= metric_func(y_truth,y_pred)
            
        return evaluation
    

In [26]:
def eval_model(model,dataloader,metric_composer_obj):
    """
    task: evaluate the model on the given dataloader using the loss_func \n
    parameters: model(torch.nn.Module subclass), dataloader(torch.utils.data.DataLoader instance), metric_composer_obj(instance of Metric_composer) \n
    return value:
    """
    
    #put model into evaluation mode
    model.eval()
    
    y_pred=[]
    y_truth=[]
    with torch.no_grad():
        
        for batch in dataloader:
            
            #unpack the batch
            input_ids_list,attention_mask_list,token_type_ids_list,label_list,*_=batch
            
            #evalaute batch with model
            output=model(input_ids_list,attention_mask_list,token_type_ids_list)
            
            #model outputs a prob distribution over the 2 classes, so get argmax to retreive predicted label
            predicted_labels=torch.argmax(output.logits, axis=1).flatten()
    
            #append both the predicted labels and the true labels 
            y_pred+=predicted_labels.tolist()
            y_truth+=label_list.flatten().tolist()
            
            
    return metric_composer_obj.evaluate(y_truth,y_pred)

compose metrics

In [27]:
#compose different performance metrics
metric_composer_obj= Metric_composer({
    "accuracy": accuracy_score,
    "precision": precision_score,
    "confusion_matrix": confusion_matrix
})

evaluate performance on train data

In [28]:
#iterate over all evaluations and print them
train_evaluation=eval_model(model,bert_train_dl,metric_composer_obj)
for metric_name, metric_evaluation in train_evaluation.items():
    print(f"----{metric_name}----")
    print(metric_evaluation)
    print("---------------------")

----accuracy----
0.9371693121693122
---------------------
----precision----
0.9393939393939394
---------------------
----confusion_matrix----
[[704  46]
 [ 49 713]]
---------------------


evaluate performance on validation data

In [29]:
#iterate over all evaluations and print them
valid_evaluation=eval_model(model,bert_valid_dl,metric_composer_obj)
for metric_name, metric_evaluation in valid_evaluation.items():
    print(f"----{metric_name}----")
    print(metric_evaluation)
    print("---------------------")

----accuracy----
0.6507936507936508
---------------------
----precision----
0.651685393258427
---------------------
----confusion_matrix----
[[65 31]
 [35 58]]
---------------------


## 3.3 Store/load the model 

### 3.3.1 store model

In [30]:
def store_model(model,description,dest_path):
    """
    task: store the given model together with a txt file that stores a short description \n
    parameters:model(torch.nn.Module), description(dict), dest_path(each stored model will be stored in a folder containing state_dict and description.txt, this defines the name of the folder)\n
    return value:
    """
    
    #create the folder at dest_path if not available
    os.mkdir(str(dest_path))
    
    #store the description in a description.json file
    with open(dest_path.joinpath("description.json"),"w") as f:
        f.write(simplejson.dumps(description, indent=4, sort_keys=True))
    
    #store the models state dict
    torch.save(model.state_dict(), str(dest_path.joinpath("state_dict.pth")))

In [31]:
def recursive_force_serializability(dict_to_transform,max_depth=100,caller_depth=0,replace_unstringable=False):
    """
    task: convert any non-serializable object within that dict to string and thus force serialiazability  \n
    parameters:dict_to_transform(dict), max_depth(int(max depth for recursive calls)),caller_depth(int(depth of the caller)), replace_unstringable=\n
    return value:
    """
    
    #create a copy of the original dict in case the caller_depth==0
    if caller_depth==0:
        dict_to_transform= dict_to_transform.copy()
    
    current_depth=caller_depth+1
    
    #recursion anker
    if current_depth==max_depth:
        warnings.warn("exceeded given recursion depth. Unable to fully transform given dictionary")
        return dict_to_transform
    
    #call ,method recursively on the items of the dict if they are dicts as well, in case of unserializable items convert them to strings if possible
    for key,item in dict_to_transform.items():
        if type(item)==dict:
            dict_to_transform[key]= recursive_force_serializability(item,max_depth=max_depth,caller_depth=current_depth,replace_unstringable=replace_unstringable)
        else:
            #try to dump the item to check ifg its serializable
            try:
                json.dumps(item)
            except TypeError:
                #if not serializable try to transform it to string
                try:
                    dict_to_transform[key]=str(item)
                except TypeError:
                    #if not stringable replace by "unserializable and unstringable object" string if replace_unstringable==True, else raise error
                    if replace_unstringable:
                        dict_to_transform[key]="unserializable and unstringable object"
                        warnings.warn(f"replaced object at key {key} with replacement string")
                    else:
                        raise AssertionError("unserializable and unstringable object was found and replace_unstringable is set to False, set to True if replacement is desired")
                    
    return dict_to_transform

In [32]:
def create_model_description(notebook_parameters=None,train_evaluation=None,valid_evaluation=None):
    """
    task: create a description of the model by storing the notebook_parameters used and evaluation scores   \n
    parameters: notebook_parameters(dict at beginning of notebook), train_evaluation(output of Model_composer.evaluate on train data),valid_evaluation(output of Model_composer.evaluate on validation data) \n
    return value: dict
    """
    
    description={
        "notebook_parameters": notebook_parameters,
        "train_evaluation": train_evaluation if train_evaluation else "no train evaluation available",
        "valid_evaluation": valid_evaluation if valid_evaluation else "no validation evaluation available"
    }

    return description
    

In [33]:
#store the current run
folder_name="chinese_v1"
if False: #change to true if desired
    desc= create_model_description(notebook_parameters,train_evaluation,valid_evaluation)
    dest_folder=notebook_parameters["storage_parameters"]["path_to_model_storage"].joinpath(folder_name)
    store_model(model,recursive_force_serializability(desc,max_depth=100),dest_folder)

### 3.3.2 load model

In [34]:
#load the model in given folder name
folder_name="chinese_v1"
if False:
    model.load_state_dict(torch.load(notebook_parameters["storage_parameters"]["path_to_model_storage"].joinpath(folder_name,"state_dict.pth")))