# Push final models to Huggingface

**Purpose:** This script is used to push final models to huggingface model-hub. This can also be done manually. 

**Dependency:** Prior to running this script, final models need to be trained (`Train_final_models.ipynb`) and downloaded from weights and biases and placed in the `TRIDENT`-folder.

**Consecutive scripts:** After running this script the following scripts may be executed. `download_wandb_artifacts.ipynb`

In [1]:
import transformers
from transformers import AutoModel, AutoTokenizer

import torch

from development_utils.training.Build_Pytorch_model import TRIDENT, DNN_module

In [2]:
model_path = '../TRIDENT/'
version = 'EC50_fish'
name = f'final_model_{version}'

In [3]:
onehotencodinglengths = {
    'EC50_algae': 1,
    'EC10_algae': 1,
    'EC50EC10_algae': 2, 
    'EC50_invertebrates': 2,
    'EC10_invertebrates': 6,
    'EC50EC10_invertebrates': 8,
    'EC50_fish': 1,
    'EC10_fish': 7,
    'EC50EC10_fish': 9
}

In [4]:
chemberta = AutoModel.from_pretrained('seyonec/PubChem10M_SMILES_BPE_450k')
tokenizer = AutoTokenizer.from_pretrained('seyonec/PubChem10M_SMILES_BPE_450k')

Some weights of the model checkpoint at seyonec/PubChem10M_SMILES_BPE_450k were not used when initializing RobertaModel: ['lm_head.layer_norm.weight', 'lm_head.decoder.bias', 'lm_head.decoder.weight', 'lm_head.layer_norm.bias', 'lm_head.dense.weight', 'lm_head.dense.bias', 'lm_head.bias']
- This IS expected if you are initializing RobertaModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing RobertaModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [5]:
dnn = DNN_module(one_hot_enc_len=onehotencodinglengths[version],
                n_hidden_layers=3,
                layer_sizes=[700,500,300],
                dropout=0.2)

model = TRIDENT(chemberta, dnn)

In [6]:
def load_ckp(checkpoint_dir, model):
    checkpoint_dnn = torch.load(checkpoint_dir+'_dnn_saved_weights.pt', map_location='cpu')
    checkpoint_roberta = torch.load(checkpoint_dir+'_roberta_saved_weights.pt', map_location='cpu')
    model.dnn.load_state_dict(checkpoint_dnn)
    model.roberta.load_state_dict(checkpoint_roberta)
    return model

In [7]:
model = load_ckp(model_path+name, model)
model.eval()

TRIDENT(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(52000, 768, padding_idx=1)
      (position_embeddings): Embedding(512, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0): RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): LayerNorm((768,), e

## Save to hub

In [8]:
from huggingface_hub import notebook_login

notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [9]:
model.roberta.push_to_hub(version, use_auth_token=)

ValueError: You must login to the Hugging Face hub on this computer by typing `transformers-cli login` and entering your credentials to use `use_auth_token=True`. Alternatively, you can pass your own token as the `use_auth_token` argument.

In [112]:
tokenizer.push_to_hub(version)

CommitInfo(commit_url='https://huggingface.co/StyrbjornKall/EC10_fish/commit/fc411db6a4aaa87d194d30829d0c8fe6a605f6f1', commit_message='Upload tokenizer', commit_description='', oid='fc411db6a4aaa87d194d30829d0c8fe6a605f6f1', pr_url=None, pr_revision=None, pr_num=None)