In [1]:
%load_ext autoreload
%autoreload 2

import os, math, numpy as np, pickle, torch 
cur_dir = os.getcwd()
while not os.getcwd().endswith('-analysis'): os.chdir('..')

from pprint import pprint
from safetensors import safe_open
from modeling_jonberta import JonbertaForSequenceClassification, add_features_to_model
from transformers import AutoConfig

results_dir = os.path.abspath('notebooks/paper/results/')

## Uploading Models to HuggingFace
Uploading 320/5 models and setting model cards. 

- I have 320 models, but 5x less if we only consider the median one per training split. 
- Default model cards were added to shame you for not filling them out, but there are no APIs for filling them out programmatically. There's no way I'm doing 320 cards by hand though. 
- I'm not registering the `JonBERTa` architecture on the hub, as there is much room for improvement. To load the model, see `modeling_jonberta.py`, and refer to the `get_model` function in this notebook to do it properly. 

In [2]:
def get_model(model_path):
    ''' Load in a JonBERTa-head/attn model '''

    config = AutoConfig.from_pretrained(model_path)
    model = JonbertaForSequenceClassification(config)
    if hasattr(config, 'add_head') and config.add_head: 
        add_features_to_model(model, config)

    state_dict = {} 
    with safe_open(os.path.join(model_path, 'model.safetensors'), framework='pt') as f: 
        for key in f.keys():
            state_dict[key] = f.get_tensor(key)
    new_layers = model.load_state_dict(state_dict, strict=False)

    pprint(new_layers)
    return model  

#### Retrieving paths to models in my `results` dir
And, filtering to keep only the median-performing model per group (hyperparam combination)

In [3]:
import pandas as pd

# get the median scoring model per group to maintain some ordinality between them
model_scores = pd.read_csv(os.path.join(results_dir, 'model_scores.csv'))

# per Group, sort on test macro avg, and remove all but the median model 
model_scores = model_scores.sort_values('test macro avg', ascending=False)
model_scores = model_scores.groupby('Group').apply(lambda x: x.iloc[len(x) // 2]).reset_index(drop=True)

# do the same for the 3-layer attention scores which for some reason I stored elsewhere 
layer_attn_scores = pd.read_csv(os.path.join(results_dir, 'model_scores_layers.csv')).sort_values('test macro avg', ascending=False)
layer_attn_scores = layer_attn_scores.groupby('Group').apply(lambda x: x.iloc[len(x) // 2]).reset_index(drop=True)

median_models = set(model_scores['Name']).union(set(layer_attn_scores['Name']))
# median_models

  model_scores = model_scores.groupby('Group').apply(lambda x: x.iloc[len(x) // 2]).reset_index(drop=True)
  layer_attn_scores = layer_attn_scores.groupby('Group').apply(lambda x: x.iloc[len(x) // 2]).reset_index(drop=True)


In [4]:
# loading in a dictionary of the model paths from my messy results directory 
model_dirs = {
    'CodeBERTa': sorted([p.path for p in os.scandir(
                        os.path.join(results_dir, '12_codeberta/huggingface/CodeBERTa-small-v1/model'))]),
    'JonBERTa-head': sorted([p.path for p in os.scandir(
                        os.path.join(results_dir, '13_jonberta/12_codeberta-biased-2e-05lr--0/model'))
                        if 'HEAD' in p.path and os.path.basename(p.path).startswith('-')]),
    'JonBERTa-attn': sorted([p.path for p in os.scandir(
                        os.path.join(results_dir, '13_jonberta/12_codeberta-biased-2e-05lr--0/model'))
                        if 'ATTN' in p.path]),
}

# remove the test models 
model_dirs = {model_type: [model for model in models if 'test' not in model and 'TEST' not in model] 
              for model_type, models in model_dirs.items()} 

# keep only the median models 
model_dirs = {model_type: [model for model in models if os.path.basename(model) in median_models] 
              for model_type, models in model_dirs.items()}

#### Renaming models 
Remapping the extremely verbose names as follows: 

- `CodeBERTa` &rarr; `CodeBERTa-ft-coco-[1,2,5]e-05lr--[0-4]`
    - e.g. `CodeBERTa-ft-coco-1e-05lr--0`
- `JonBERTa-head` &rarr; `JonBERTa-head-ft-(dense-proj-reinit)--[0-4]` (all have `2e-05` learning rate)
    - e.g. `JonBERTa-head-ft-(dense-proj-)--1`
- `JonBERTa-attn` &rarr; `JonBERTa-attn-ft-(0,1,2,3,4,5L)--[0-4]`
    - e.g. `JonBERTa-attn-ft-(0,1,2L)--0` 

In [5]:
def get_model_name(model_path): 
    model_name = os.path.basename(model_path)

    if model_name.startswith('12_codeberta'):               # CodeBERTa
        model_name = f'CodeBERTa-ft-coco-{model_name[-10:]}'

    elif 'HEAD' in model_name:                              # JonBERTa with HEAD
        # remove everything until HEAD in model_name 
        model_name = f'JonBERTa-head-ft-coco-({model_name[model_name.index("HEAD")+4:-1]}' + '-' + model_name[-1]
        # remove 24th character 
        model_name = model_name[:23] + model_name[24:]
        # remove lr
        model_name = model_name[:-11] + model_name[-3:]

        # huggingface doens't want -- in the model name 
        # get everything between () in the model_name
        opts = model_name[model_name.index('(')+1:model_name.index(')')].split('-')
        opts = '-'.join([opt for opt in opts if opt != ''])

        return f'JonBERTa-head-ft-coco-{opts}' if len(opts) > 0 else 'JonBERTa-head-ft-coco'

    elif 'ATTN' in model_name:                              # JonBERTa with ATTN
        # get the numbers between [] in the model_name 
        layers = model_name[model_name.index('[')+1:model_name.index(']')]
        # remove whitespace between layers 
        layers = f'{layers.replace(" ", "").replace(",","")}L'

        # remove everything until and including ) in model_name 
        model_name = model_name[model_name.index(')')+1:]

        # if the third-last character is not a -, insert a - 
        if model_name[-3] != '-': 
            model_name = model_name[:-1] + '-' + model_name[-1]
        model_name = model_name[-3:]
        model_name = f'JonBERTa-attn-ft-coco-{layers}{model_name}'

    return model_name[:-3]

for model_type, model_paths in model_dirs.items(): 
    print(f'\033[1m{model_type}\033[0m')
    for model_path in model_paths: 
        model_name = get_model_name(model_path)
        print(f'{len(model_name) + len("AISE-TUDelft/")} \t{model_name} \t{model_path}')

[1mCodeBERTa[0m
38 	CodeBERTa-ft-coco-1e-05lr 	/home/jovyan/work/code4me-analysis/notebooks/paper/results/12_codeberta/huggingface/CodeBERTa-small-v1/model/12_codeberta-biased-1e-05lr--1
38 	CodeBERTa-ft-coco-2e-05lr 	/home/jovyan/work/code4me-analysis/notebooks/paper/results/12_codeberta/huggingface/CodeBERTa-small-v1/model/12_codeberta-biased-2e-05lr--4
38 	CodeBERTa-ft-coco-5e-05lr 	/home/jovyan/work/code4me-analysis/notebooks/paper/results/12_codeberta/huggingface/CodeBERTa-small-v1/model/12_codeberta-biased-5e-05lr--3
[1mJonBERTa-head[0m
34 	JonBERTa-head-ft-coco 	/home/jovyan/work/code4me-analysis/notebooks/paper/results/13_jonberta/12_codeberta-biased-2e-05lr--0/model/-13_jonberta-biased-12_codeberta-biased-2e-05lr--0-(HEAD---)-2e-05lr-4
41 	JonBERTa-head-ft-coco-reinit 	/home/jovyan/work/code4me-analysis/notebooks/paper/results/13_jonberta/12_codeberta-biased-2e-05lr--0/model/-13_jonberta-biased-12_codeberta-biased-2e-05lr--0-(HEAD---reinit)-2e-05lr-3
39 	JonBERTa-head-ft-c

In [6]:
model_type_roberta  = '''[RoBERTa](https://huggingface.co/FacebookAI/roberta-base)'''
model_type_jonberta = '''[JonBERTa](https://github.com/Ar4l/curating-code-completions/blob/main/modeling_jonberta.py)'''

codeberta_hyp = '''
num_train_epochs : int = 6
learning_rate    : float = search([2e-5, 1e-5, 5e-5])
batch_size       : int = 16
'''.strip()

jonberta_hyp = '''
num_train_epochs : int      = 3 
learning_rate    : float    = 2e-5
batch_size       : int      = 16
'''.strip()

jonberta_head_conf = '''
num_telemetry_features :int = 26
add_head              :bool = True
add_dense             :bool = search([True, False])
add_proj              :bool = search([True, False])
reinit_head           :bool = search([True, False])
'''.strip()

jonberta_attn_conf = '''
num_telemetry_features  :int = 26

add_feature_embeddings :bool = True 
feature_hidden_size     :int = num_telemetry_features * 4
feature_dropout_prob  :float = 0.1
add_feature_bias       :bool = True

add_self_attn          :bool = True
self_attn_layers  :list[int] = search(sum(
    [[i,j,k] for i in range(6) for j in range(6) for k in range(6) if i < j < k], 
    [[i,j] for j in range(6) for i in range(6) if i < j],
    [[i] for i in range(6)],
    []
))
'''.strip()

model_card = '''
---
library_name: transformers
tags:
- code
license: mit
---

## {}

Model for the paper [**"A Transformer-Based Approach for Smart Invocation of Automatic Code Completion"**](https://arxiv.org/abs/2405.14753). 

#### Description
This model is fine-tuned on a code-completion dataset collected from the open-source [Code4Me](https://github.com/code4me-me/code4me) plugin. The training objective is to have a small, lightweight transformer model to filter out unnecessary and unhelpful code completions. To this end, we leverage the in-IDE telemetry data, and integrate it with the textual code data in the transformer's attention module. 

- **Developed by:** [AISE Lab](https://www.linkedin.com/company/aise-tudelft/) @ [SERG](https://se.ewi.tudelft.nl/), Delft University of Technology 
- **Model type:** {}
- **Language:** Code 
- **Finetuned from model:** [`CodeBERTa-small-v1`](https://huggingface.co/huggingface/CodeBERTa-small-v1). 

Models are named as follows: 

- `CodeBERTa` &rarr; `CodeBERTa-ft-coco-[1,2,5]e-05lr`
    - e.g. `CodeBERTa-ft-coco-2e-05lr`, which was trained with learning rate of `2e-05`.
- `JonBERTa-head` &rarr; `JonBERTa-head-ft-[dense,proj,reinit]` 
    - e.g. `JonBERTa-head-ft-dense-proj`, where all have `2e-05` learning rate, but may differ in the head layer in which the telemetry features are introduced (either `head` or `proj`, with optional `reinit`ialisation of all its weights).
- `JonBERTa-attn` &rarr; `JonBERTa-attn-ft-[0,1,2,3,4,5]L`
    - e.g. `JonBERTa-attn-ft-012L` , where all have `2e-05` learning rate, but may differ in the attention layer(s) in which the telemetry features are introduced (either `0`, `1`, `2`, `3`, `4`, or `5L`).

Other hyperparameters may be found in the paper or the replication package (see below).

#### Sources 

- **Replication Repository:** [`Ar4l/curating-code-completions`](https://github.com/Ar4l/curating-code-completions/tree/main)
- **Paper:** [**"A Transformer-Based Approach for Smart Invocation of Automatic Code Completion"**](https://arxiv.org/abs/2405.14753) 
- **Contact:** https://huggingface.co/Ar4l

To cite, please use 

```bibtex
@misc{{de_moor_smart_invocation_2024,
	title = {{A {{Transformer}}-{{Based}} {{Approach}} for {{Smart}} {{Invocation}} of {{Automatic}} {{Code}} {{Completion}}}},
	url = {{http://arxiv.org/abs/2405.14753}},
	doi = {{10.1145/3664646.3664760}},
	author = {{de Moor, Aral and van Deursen, Arie and Izadi, Maliheh}},
	month = may,
	year = {{2024}},
}}
```

#### Training Details 
This model was trained with the following hyperparameters, everything else being `TrainingArguments`' default. The dataset was prepared identically across all models as detailed in the paper. 

```python
{}
```

{}
'''.strip()

headconfig = '''
#### Model Configuration

```python
{}
```
'''.strip()

def get_model_card(model_type, model_name): 
    ''' 1. Model type (JonBERTa-head/attn/CodeBERTa)
        2. Model type repo [RoBERTa](https://huggingface.co/FacebookAI/roberta-base) or [JonBERTa](https://github.com/Ar4l/curating-code-completions/blob/main/modeling_jonberta.py)
        3. Hyperparameters 
        4. IF JonBERTa: add JonBERTaConfig head/attn
    '''

    return model_card.format(

        model_name, 

        model_type_roberta if model_type == 'CodeBERTa' else model_type_jonberta,

        codeberta_hyp if model_type == 'CodeBERTa' else jonberta_hyp,

        headconfig.format(jonberta_head_conf) if 'head' in model_type else \
        headconfig.format(jonberta_attn_conf) if 'attn' in model_type else \
        ''
    )

for model_type, model_paths in model_dirs.items():
    for model_path in model_paths: 

        card_file = os.path.join(model_path, 'readme.md')
        with open(card_file, 'w') as f:
            f.write(get_model_card(model_type, get_model_name(model_path)))
            print(get_model_name(model_path), 'written to', card_file)

CodeBERTa-ft-coco-1e-05lr written to /home/jovyan/work/code4me-analysis/notebooks/paper/results/12_codeberta/huggingface/CodeBERTa-small-v1/model/12_codeberta-biased-1e-05lr--1/readme.md


CodeBERTa-ft-coco-2e-05lr written to /home/jovyan/work/code4me-analysis/notebooks/paper/results/12_codeberta/huggingface/CodeBERTa-small-v1/model/12_codeberta-biased-2e-05lr--4/readme.md
CodeBERTa-ft-coco-5e-05lr written to /home/jovyan/work/code4me-analysis/notebooks/paper/results/12_codeberta/huggingface/CodeBERTa-small-v1/model/12_codeberta-biased-5e-05lr--3/readme.md
JonBERTa-head-ft-coco written to /home/jovyan/work/code4me-analysis/notebooks/paper/results/13_jonberta/12_codeberta-biased-2e-05lr--0/model/-13_jonberta-biased-12_codeberta-biased-2e-05lr--0-(HEAD---)-2e-05lr-4/readme.md
JonBERTa-head-ft-coco-reinit written to /home/jovyan/work/code4me-analysis/notebooks/paper/results/13_jonberta/12_codeberta-biased-2e-05lr--0/model/-13_jonberta-biased-12_codeberta-biased-2e-05lr--0-(HEAD---reinit)-2e-05lr-3/readme.md
JonBERTa-head-ft-coco-proj written to /home/jovyan/work/code4me-analysis/notebooks/paper/results/13_jonberta/12_codeberta-biased-2e-05lr--0/model/-13_jonberta-biased-12_

#### My greatest enemy: HF APIs
We just need to (1) load in models, (2) push to hub under `AISE-TUDelft`, (3) add a model card. How difficult can it be? 

The fact that there is a `ModelCard` library makes a trivial task of uploading a `readme.md` nontrivial. 

In [7]:
from huggingface_hub import notebook_login, Repository, ModelCard, get_collection, add_collection_item, login
import tqdm

login('token', write_permission=True)

org_name = 'AISE-TUDelft' 
collection_name = 'smart-invocation-of-code-completion-66473ddf6fa6cf6e541f750c'

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /home/jovyan/.cache/huggingface/token
Login successful


In [8]:
for model_type, model_paths in tqdm.tqdm(model_dirs.items()):

    # already done but my kernel keeps crashing
    if model_type == 'CodeBERTa': continue 

    for model_path in model_paths:

        model_name = get_model_name(model_path)
        hf_model = get_model(model_path)
        hf_model_card = ModelCard(get_model_card(model_type, model_name))

        hf_path = os.path.join(org_name, model_name)
        hf_model.push_to_hub(repo_id=hf_path)
        hf_model_card.push_to_hub(repo_id=hf_path)

        hf_collection = os.path.join(org_name, collection_name)
        add_collection_item(hf_collection, hf_path, 'model', exists_ok=True)

        print(f'uploaded {model_name}')

  0%|          | 0/3 [00:00<?, ?it/s]

<All keys matched successfully>


README.md:   0%|          | 0.00/3.16k [00:00<?, ?B/s]

uploaded JonBERTa-head-ft-coco
<All keys matched successfully>


README.md:   0%|          | 0.00/3.17k [00:00<?, ?B/s]

uploaded JonBERTa-head-ft-coco-reinit
expanding classifier.out_proj
<All keys matched successfully>


README.md:   0%|          | 0.00/3.16k [00:00<?, ?B/s]

uploaded JonBERTa-head-ft-coco-proj
expanding classifier.out_proj
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/334M [00:00<?, ?B/s]

uploaded JonBERTa-head-ft-coco-proj-reinit
expanding classifier.dense
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/334M [00:00<?, ?B/s]

uploaded JonBERTa-head-ft-coco-dense
expanding classifier.dense
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/334M [00:00<?, ?B/s]

uploaded JonBERTa-head-ft-coco-dense-reinit
expanding both dense and proj
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/334M [00:00<?, ?B/s]

uploaded JonBERTa-head-ft-coco-dense-proj
expanding both dense and proj
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/334M [00:00<?, ?B/s]

 67%|██████▋   | 2/3 [02:27<01:13, 73.69s/it]

uploaded JonBERTa-head-ft-coco-dense-proj-reinit
Adding custom self-attention to layer 0
Adding custom self-attention to layer 1
Adding custom self-attention to layer 2
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-012L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 1
Adding custom self-attention to layer 3
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-013L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 1
Adding custom self-attention to layer 4
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-014L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 1
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-015L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 1
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-01L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 2
Adding custom self-attention to layer 3
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-023L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 2
Adding custom self-attention to layer 4
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-024L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 2
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-025L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 2
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-02L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 3
Adding custom self-attention to layer 4
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-034L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 3
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-035L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 3
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-03L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 4
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-045L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 4
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-04L
Adding custom self-attention to layer 0
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-05L
Adding custom self-attention to layer 1
Adding custom self-attention to layer 2
Adding custom self-attention to layer 3
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-123L
Adding custom self-attention to layer 1
Adding custom self-attention to layer 2
Adding custom self-attention to layer 4
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-124L
Adding custom self-attention to layer 1
Adding custom self-attention to layer 2
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-125L
Adding custom self-attention to layer 1
Adding custom self-attention to layer 2
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-12L
Adding custom self-attention to layer 1
Adding custom self-attention to layer 3
Adding custom self-attention to layer 4
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-134L
Adding custom self-attention to layer 1
Adding custom self-attention to layer 3
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-135L
Adding custom self-attention to layer 1
Adding custom self-attention to layer 3
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-13L
Adding custom self-attention to layer 1
Adding custom self-attention to layer 4
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-145L
Adding custom self-attention to layer 1
Adding custom self-attention to layer 4
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-14L
Adding custom self-attention to layer 1
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-15L
Adding custom self-attention to layer 2
Adding custom self-attention to layer 3
Adding custom self-attention to layer 4
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-234L
Adding custom self-attention to layer 2
Adding custom self-attention to layer 3
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-235L
Adding custom self-attention to layer 2
Adding custom self-attention to layer 3
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-23L
Adding custom self-attention to layer 2
Adding custom self-attention to layer 4
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-245L
Adding custom self-attention to layer 2
Adding custom self-attention to layer 4
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-24L
Adding custom self-attention to layer 2
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-25L
Adding custom self-attention to layer 3
Adding custom self-attention to layer 4
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/338M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-345L
Adding custom self-attention to layer 3
Adding custom self-attention to layer 4
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-34L
Adding custom self-attention to layer 3
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-35L
Adding custom self-attention to layer 4
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-45L
Adding custom self-attention to layer 0
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-0L
Adding custom self-attention to layer 1
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-1L
Adding custom self-attention to layer 2
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-2L
Adding custom self-attention to layer 3
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-3L
Adding custom self-attention to layer 4
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

uploaded JonBERTa-attn-ft-coco-4L
Adding custom self-attention to layer 5
<All keys matched successfully>


model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

100%|██████████| 3/3 [17:54<00:00, 358.26s/it]

uploaded JonBERTa-attn-ft-coco-5L



