In [None]:
from google.colab import userdata
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
import torch
import wandb
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer, SentenceTransformerTrainingArguments, SentenceTransformerTrainer, losses
from sentence_transformers.evaluation import SentenceEvaluator
from datasets import load_dataset

In [None]:
folder = '/content/drive/MyDrive/conserta-avioes'

In [None]:
os.environ['WANDB_API_KEY'] = userdata.get('WANDB_API_KEY')

# Datasets e Funções

## Dataset de Treino

In [None]:
dataset_treino = load_dataset(folder,data_files="dataset_treino_pronto.csv")

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset_treino = dataset_treino['train'].shuffle()
dataset_treino.features

{'sentence1': Value('string'),
 'sentence2': Value('string'),
 'label': Value('int64')}

## Dataset de Teste

In [None]:
dataset_teste = pd.read_csv(folder+"/dataset_teste.csv")

In [None]:
corpus_teste = dataset_teste.loc[dataset_teste['Tipo']=='orig','Frase'].to_list()
corpus_teste[:10]

['RIGHT ENGINE #4 AIR BAFFLE IS CRACKED',
 'RIGHT ENGINE FORWARD ALTERNATOR ATTACH BOLT LOOSE.',
 'AFTER LANDING, A/C IDLE @ 970 RPM.',
 'TOP FRONT RIGHT BAFFLE, BAFFLE SEAL RIVET PULLED THROUGH.',
 'ROUGH RUNNING ENGINE ON START. ENGINE RAN SMOOTHER AS IT WAR',
 'CYLINDER HEAD TEMPERATURE NEEDLE BOUNCES & HAD ENGINE RUN ROUGH MOMENTARILY.',
 'LACING CORD LOOSE ON SCAT TUBING + IGNITION LEAD TO FRAME, RIGHT SI',
 'RIGHT SIDE BACK BAFFLE IS CRACKED & BRACKET RIVETS BROKEN.',
 'SPARK PLUG BAFFLE PLUG IS WORN.',
 '4TH STAGE NOZZLE HAS SEVERAL CRACKS IN IT.']

In [None]:
queries_teste = dataset_teste.loc[dataset_teste['Tipo']=='var','Frase'].apply(lambda x: x.upper().strip()).to_list()
queries_teste[:10]

['CRACKED AIR BAFFLE DETECTED ON THE RIGHT ENGINE, #4.',
 'FOUND A CRACK IN THE LEFT ENGINE’S #3 AIR BAFFLE DURING INSPECTION.',
 'THE AIR BAFFLE ON ENGINE #2 (RIGHT SIDE) SHOWS A CRACK.',
 'LEFT ENGINE #4 AIR BAFFLE IS CRACKED.',
 'THE FORWARD ALTERNATOR ATTACH BOLT ON THE RIGHT ENGINE WAS FOUND LOOSE.',
 'FOUND THE ALTERNATOR FORWARD MOUNTING BOLT LOOSE ON THE LEFT ENGINE.',
 'ON ENGINE #2 THE FORWARD BOLT THAT SECURES THE ALTERNATOR IS LOOSE.',
 'NO. 1 ENGINE FORWARD ALTERNATOR ATTACH BOLT IS LOOSE.',
 'AFTER LANDING, A/C IDLE AT 970 RPM.',
 'UPON TOUCHDOWN THE AIRPLANE REMAINED IDLING AROUND 960 RPM.']

## Dataset de Validação

### Treino

In [None]:
dataset_val = load_dataset(folder,data_files="dataset_validacao_pronto.csv")

Generating train split: 0 examples [00:00, ? examples/s]

In [None]:
dataset_val = dataset_val['train'].shuffle()
dataset_val.features

{'sentence1': Value('string'),
 'sentence2': Value('string'),
 'label': Value('int64')}

### Recall @ K

In [None]:
dataset_val_bruto = pd.read_csv(folder+"/dataset_validacao.csv")

In [None]:
corpus_val = dataset_val_bruto.loc[dataset_val_bruto['Tipo']=='orig','Frase'].to_list()
corpus_val[:10]

['HOLDING SHORT OF RUNWAY, AFTER COMPLETION OF RUN UP, ENGINE',
 'IN FLIGHT, ENGINE BECAME ROUGH. DISCOVERED RIGHT MAGNETOS INOPERABLE. E',
 'LAST DAILY SHUT DOWN IDLE OVER RIDE SPEED CHECK - ENGINE RPM',
 'OIL FOUND ON ENTIRE RIGHT SIDE OF FUSELAGE.',
 'BAFFLE SEALS UNDER BOTH PROP GOVERNORS ARE LOOSE.',
 'RIGHT ENGINE, FORWARD I/B BAFFLE HAS SMALL CRACK.',
 'FELT ON #1 & 3 BAFFLE COMING LOOSE.',
 'FORWARD PUSH ROD SEAL LEAKING @ ENGINE ON #3 CYLINDER.',
 'BAFFLE SEAL HAS HOLE IN IT.',
 'CYLINDER #3 AFT BAFFLE MOUNT LOOSE.']

In [None]:
queries_val = dataset_val_bruto.loc[dataset_val_bruto['Tipo']=='var','Frase'].apply(lambda x: x.upper().strip()).to_list()
queries_val[:10]

['HOLDING SHORT OF THE RUNWAY AFTER COMPLETION OF THE RUN-UP, THE ENGINE GAUGES WERE MONITORED.',
 'STOPPED SHORT OF THE RUNWAY ONCE THE RUN-UP WAS DONE AND WE KEPT AN EYE ON THE ENGINE.',
 'AFTER FINISHING THE RUN-UP AND HOLDING SHORT OF THE RUNWAY, THE ENGINE REMAINED UNDER OBSERVATION.',
 'RUN-UP COMPLETE, HOLDING SHORT OF RUNWAY — ENGINE BEING CHECKED.',
 'DURING FLIGHT THE ENGINE BEGAN RUNNING ROUGH; INSPECTION REVEALED THE RIGHT MAGNETOS WERE INOPERATIVE.',
 'THE ENGINE GOT ROUGH IN THE AIR AND WE FOUND THE LEFT MAGNETO HAD FAILED.',
 'IN-FLIGHT ENGINE ROUGHNESS NOTED — DISCOVERED THE #2 MAGNETO NOT WORKING.',
 'ENGINE STARTED ROUGHING OUT WHILE AIRBORNE; THE RIGHT MAG WAS FOUND INOPERATIVE.',
 'DURING THE LAST DAILY SHUTDOWN THE IDLE-OVERRIDE SPEED WAS CHECKED AND ENGINE RPM RECORDED.',
 'AT THE LAST DAILY SHUT-DOWN WE RAN AN IDLE OVERRIDE SPEED CHECK AND NOTED THE ENGINE RPM.']

## Recall @ K

In [None]:
def recall_at_k(model,k,corpus,queries,dist='cos'):
    if len(queries) != 4*len(corpus):
      print("Tamanho errado para as listas de corpus e queries.")
      return 0
    gabarito = np.repeat(np.identity(len(corpus)),4,axis=0)
    with torch.no_grad():
            corpus_emb = model.encode(corpus, convert_to_tensor=True, show_progress_bar=False)
            query_emb = model.encode(queries, convert_to_tensor=True, show_progress_bar=False)
            sims = model.similarity(query_emb,corpus_emb)
    sims_np = sims.cpu().numpy()
    ranks = np.argsort(np.argsort(-sims_np, axis=1),axis=1)
    topk = ranks < k
    encontradas = np.logical_and(topk,gabarito).astype(int).sum()
    return encontradas/len(queries)

In [None]:
class RecallAtKEvaluator(SentenceEvaluator):
    def __call__(
        self, model: SentenceTransformer, output_path: str | None = None, epoch: int = -1, steps: int = -1
    ) -> float | dict[str, float]:
        recall_1 = recall_at_k(model,1,corpus_val,queries_val)
        recall_3 = recall_at_k(model,3,corpus_val,queries_val)
        recall_7 = recall_at_k(model,7,corpus_val,queries_val)
        return {"recall@1":recall_1,"recall@3":recall_3,"recall@7":recall_7}

# Treinamento

In [None]:
modelos = [
    "all-MiniLM-L6-v2",
    "all-MiniLM-L12-v2",
    "multi-qa-mpnet-base-dot-v1",
    "multi-qa-mpnet-base-cos-v1",
    "multi-qa-distilbert-cos-v1",
    "multi-qa-MiniLM-L6-cos-v1",
]

learning_rates = [5e-06,5e-05]

In [None]:
resultados = pd.DataFrame(columns=['Modelo Base','Learning Rate','Recall@1','Recall@3','Recall@7'])
resultados['Modelo Base'] = modelos+modelos
resultados['Learning Rate'] = [5e-06]*6+[5e-05]*6
resultados

Unnamed: 0,Modelo Base,Learning Rate,Recall@1,Recall@3,Recall@7
0,all-MiniLM-L6-v2,5e-06,,,
1,all-MiniLM-L12-v2,5e-06,,,
2,multi-qa-mpnet-base-dot-v1,5e-06,,,
3,multi-qa-mpnet-base-cos-v1,5e-06,,,
4,multi-qa-distilbert-cos-v1,5e-06,,,
5,multi-qa-MiniLM-L6-cos-v1,5e-06,,,
6,all-MiniLM-L6-v2,5e-05,,,
7,all-MiniLM-L12-v2,5e-05,,,
8,multi-qa-mpnet-base-dot-v1,5e-05,,,
9,multi-qa-mpnet-base-cos-v1,5e-05,,,


In [None]:
modelo = modelos[2]
learning_rate = learning_rates[0]
resultados.loc[(resultados['Modelo Base']==modelo)&(resultados['Learning Rate']==learning_rate)]

Unnamed: 0,Modelo Base,Learning Rate,Recall@1,Recall@3,Recall@7
2,multi-qa-mpnet-base-dot-v1,5e-06,,,


In [None]:
for i,modelo in enumerate(modelos):
  for learning_rate in learning_rates:
    print(f"Treinando modelo '{modelo}' (modelo {i+1} de 6) com learning rate de {learning_rate}:")
    nome = f"{modelo}_{learning_rate}"
    epocas = 4

    run = wandb.init(project="conserta-avioes_fine-tuning_01",name=nome)

    model = SentenceTransformer(modelo)
    train_loss = losses.CosineSimilarityLoss(model=model)

    args = SentenceTransformerTrainingArguments(
      output_dir=nome,
      num_train_epochs=epocas,
      per_device_train_batch_size=16,
      per_device_eval_batch_size=16,
      eval_strategy="steps",
      eval_steps=100,
      save_strategy="steps",
      save_steps=100,
      save_total_limit=2,
      logging_steps=100,
      load_best_model_at_end=True,
      metric_for_best_model="recall@1",
      greater_is_better=True,
      learning_rate=learning_rate,
      #max_grad_norm = 1.0,
      run_name=nome
    )

    trainer = SentenceTransformerTrainer(
      model=model,
      args=args,
      train_dataset=dataset_treino,
      eval_dataset=dataset_val,
      loss=train_loss,
      evaluator=RecallAtKEvaluator()
    )

    trainer.train()

    run.finish()

    model.save(folder+"/modelos/"+nome)

    model.eval()
    recall_1_teste = recall_at_k(model,1,corpus_teste,queries_teste)
    recall_3_teste = recall_at_k(model,3,corpus_teste,queries_teste)
    recall_7_teste = recall_at_k(model,7,corpus_teste,queries_teste)
    print(f"Recall@1: {recall_1_teste}")
    print(f"Recall@3: {recall_3_teste}")
    print(f"Recall@7: {recall_7_teste}")

    resultados.loc[(resultados['Modelo Base']==modelo)&(resultados['Learning Rate']==learning_rate),'Recall@1'] = recall_1_teste
    resultados.loc[(resultados['Modelo Base']==modelo)&(resultados['Learning Rate']==learning_rate),'Recall@3'] = recall_3_teste
    resultados.loc[(resultados['Modelo Base']==modelo)&(resultados['Learning Rate']==learning_rate),'Recall@7'] = recall_7_teste

    resultados.to_csv(folder+"/resultados_finetuning.csv")

Treinando modelo 'all-MiniLM-L6-v2' (modelo 1 de 6) com learning rate de 5e-06:


[34m[1mwandb[0m: Currently logged in as: [33mana-sovat[0m ([33mana-sovat-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]



Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0657,0.041374,0.944099,0.995342,1.0
200,0.0498,0.034785,0.936335,0.995342,1.0
300,0.0458,0.032366,0.936335,0.992236,1.0
400,0.0405,0.03095,0.930124,0.986025,1.0
500,0.0402,0.02995,0.930124,0.987578,1.0
600,0.0408,0.029266,0.928571,0.982919,0.998447
700,0.0373,0.028257,0.934783,0.984472,0.998447
800,0.0349,0.027569,0.930124,0.982919,1.0
900,0.0374,0.026585,0.928571,0.982919,1.0
1000,0.0366,0.026146,0.934783,0.98913,0.998447


0,1
eval/loss,█▆▅▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@1,██▃▇▄▆▄▇█▇▃▃▁▅▃▄▅▄▇▅█▄▄▂▂▃▄▆▄▆▅▆▆▇▇▆▆▆▆▆
eval/recall@3,██▇▅▅▅▄▄▆▅▅▅▅▄▁▃▃▃▃▅▃▃▃▃▄▃▃▂▃▃▃▃▄▄▄▄▄▄▄▄
eval/recall@7,████▆▆▆▆▆▆▆▆▆▆█▆▆▆▆▆▆▆▆▆▆▆▆▆▆▁▁▁▁▁▁▁▁▁▁▁
eval/runtime,▂██▁▁▆▁▂▂▁▂▇▂▁▁▁▁▁▂▁▂▄▂▃▂▂▁▁▂▁▁▁▁▁▂▁▃▁▂▂
eval/samples_per_second,▆▁▅▂████▂▅██▆█▇█▅█▅▇▅▆▆▅█▆████▇▅▆▆▇▇▅██▅
eval/steps_per_second,▆▁█▂▆▇▆███▂▅▇██▇██▇▆▅█▄▅▅▅█▆▆█████▅▇█▇██
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇█████
train/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇█████
train/grad_norm,▃▅▂▃▄▃▆▇▃▅▄▄▆▄▂▂▄▆▂▃▂▅▇▂▄█▄▃▅▁▄▁▃▂▃▄▃▄▃▅

0,1
eval/loss,0.02025
eval/recall@1,0.93323
eval/recall@3,0.98137
eval/recall@7,0.99534
eval/runtime,4.676
eval/samples_per_second,688.619
eval/steps_per_second,43.199
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.9243827160493827
Recall@3: 0.9768518518518519
Recall@7: 0.9969135802469136
Treinando modelo 'all-MiniLM-L6-v2' (modelo 1 de 6) com learning rate de 5e-05:


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0462,0.030197,0.942547,0.990683,0.998447
200,0.0335,0.026899,0.931677,0.987578,0.998447
300,0.0316,0.022994,0.930124,0.984472,1.0
400,0.0268,0.02152,0.936335,0.982919,0.996894
500,0.0276,0.022038,0.92236,0.979814,0.992236
600,0.0254,0.019941,0.931677,0.98913,0.998447
700,0.0224,0.020292,0.930124,0.990683,1.0
800,0.0212,0.019496,0.942547,0.992236,0.998447
900,0.0242,0.019273,0.940994,0.992236,0.996894
1000,0.0245,0.019057,0.947205,0.990683,0.996894


0,1
eval/loss,█▇▅▄▃▃▃▃▃▂▂▂▂▂▁▁▁▁▁▂▁▁▁▁▁▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁
eval/recall@1,▆▃▅▁▃▆▆██▇▆▅▆▆▃█▅▇██▇▇▇▆▆▇██▇▆▅▅▅▆▅▅▅▆▆▅
eval/recall@3,▇▆▄▃▂▇█▇▁▃▁▅▅▃▄▆▅▇▆▇▄▆▃▄▆▇▅▆▆▇▇▇▇▇▇▆▆▆▆▆
eval/recall@7,▇▇█▆█▆▆▁▃▁▄▅▅▅▅▅▅▅██▅▅▅▅▅▅▅▅▅▅▆▅▅▅▅▅▄▅▅▅
eval/runtime,▁▃▁█▄▂▁▁▅▄▅▂▄▁▂▁▃█▃▄▁▁▃▂▁▅▂▁▁▁▁▂▄▃▁▃▁▁▄▂
eval/samples_per_second,███▆▄▃▄▅▄▇▇█▆▁▅▇▇▇▇▇▆▇▇█▆▆▇█▄▅▇▇▇▇▄▇▇▅▅▇
eval/steps_per_second,███▁▄▆▄█▇▄▄▇▇▇█▁▅▄▇█▇▇▇▃▃▇█▇▆▄▄▅▇▇▇▆▅▅▅▆
train/epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇█████
train/global_step,▁▁▁▁▁▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
train/grad_norm,▅▂▂▃▂▆▃▆▃▅▂█▁▂▄▃▃▃▃▂▂▅▆▂▄▁▂▂▂▁▁▂▁▁▁▁▁▃▁▁

0,1
eval/loss,0.01515
eval/recall@1,0.93789
eval/recall@3,0.98913
eval/recall@7,0.99379
eval/runtime,3.8361
eval/samples_per_second,839.388
eval/steps_per_second,52.657
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.9506172839506173
Recall@3: 0.9830246913580247
Recall@7: 0.9984567901234568
Treinando modelo 'all-MiniLM-L12-v2' (modelo 2 de 6) com learning rate de 5e-06:


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0587,0.037283,0.956522,0.992236,1.0
200,0.0432,0.03196,0.948758,0.992236,1.0
300,0.0395,0.029672,0.945652,0.990683,1.0
400,0.0358,0.028059,0.940994,0.990683,1.0
500,0.0348,0.027065,0.936335,0.990683,1.0
600,0.0355,0.02617,0.936335,0.993789,1.0
700,0.0328,0.025243,0.937888,0.992236,1.0
800,0.0298,0.024791,0.934783,0.98913,1.0
900,0.0331,0.023977,0.937888,0.993789,1.0
1000,0.0312,0.023538,0.947205,0.995342,1.0


0,1
eval/loss,█▅▅▄▄▃▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@1,▅▃▁▁▂▅▄▃▅▄▆▄▂▃▃▅▅▅▅▅▆▆▅▆▇▇▆▆▆▇█▆▇▇▇█▇▇▇▇
eval/recall@3,▇▆▆▇▇▇█▅▃▂▂▂▂▁▂▄▄▄▄▅▆▆▆▅▅▆▅▆▆▄▄▃▄▃▂▄▄▄▄▄
eval/recall@7,██████████▆▆▆█▃▃▃▃▃▁▆▆▃▆▆▆▆▃▃▃▃▃▃▃▃▃▃▃▃▃
eval/runtime,▂▅▁▅▂▃▅▅▅▅▂▂▁▁▁▂▄▂▆▅▅▂▆▃▆▂▄▁▁▅█▁▁▅▂▅▂▂▄▅
eval/samples_per_second,▇▄█▄▇▃▆▃▄▂█▃▇▅▄▄▃▃▆▃▁▃▇▄▆▄▃▁▇▃█▇▄▇▇▇▅▆▅▃
eval/steps_per_second,▇█▄█▇▆▃▃▁▇█▂▇▅▇▃▃▃▄▇▅▂▃▁▃▄▇▆▇▄▇▂█▇▃▇▇▆▅▃
train/epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇██
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇▇██
train/grad_norm,▃▇▃▅▄█▂▄▃█▄█▆▄▂▄▄▇▅▇▂▅▅▄▂▂▂▁▁▅▂▄▂▅▂▅▂▅▃▆

0,1
eval/loss,0.01684
eval/recall@1,0.95497
eval/recall@3,0.98602
eval/recall@7,0.99689
eval/runtime,6.8335
eval/samples_per_second,471.206
eval/steps_per_second,29.56
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.9552469135802469
Recall@3: 0.9938271604938271
Recall@7: 1.0
Treinando modelo 'all-MiniLM-L12-v2' (modelo 2 de 6) com learning rate de 5e-05:


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0429,0.029752,0.953416,0.993789,0.998447
200,0.0301,0.026766,0.939441,0.98913,0.998447
300,0.0277,0.022635,0.940994,0.982919,1.0
400,0.0246,0.020573,0.947205,0.984472,1.0
500,0.0246,0.020504,0.944099,0.993789,1.0
600,0.0229,0.018637,0.947205,0.990683,1.0
700,0.0205,0.018382,0.950311,0.979814,0.996894
800,0.0186,0.019294,0.947205,0.979814,0.990683
900,0.0223,0.019308,0.937888,0.990683,0.998447
1000,0.022,0.018112,0.959627,0.987578,1.0


0,1
eval/loss,█▄▄▃▃▃▂▃▂▂▂▂▂▂▂▁▁▁▁▁▂▁▁▁▁▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁
eval/recall@1,▆▃▃▅▄▅█▄▁▂▆▃▂▃▃▅▄▅▆▅▆▆▂▃▂▃▄▃▃▁▂▅▅▅▃▄▃▃▄▅
eval/recall@3,▆▅█▇▃▆▆▅▂▅▁▁▄▄▅▅▅▆▅▅▇▃▅▅▃▄▂▂▃▂▅▅▆▄▃▃▃▃▃▃
eval/recall@7,▇▇███▇█▆▁▃▂▂▅▅▅▆▅▅▅▅▇▇████▅▅▅▅▃▅▅▅▅▅▅▅▅▅
eval/runtime,▁▂▂▃▃▁▂▂▃▃▁▁▂▂▂▁▃▂▄▄▃▂▃▄▂▅▃▅▂▂▁▂█▂▂▅▁▂▃▃
eval/samples_per_second,▇▇▇▇▆▆▆▇▇▇▃▇▅▇▇▆▃▇▆▇▄▃▇▆▅▆▆▃▇▅█▇▁▇▇▄▇▅▇▆
eval/steps_per_second,████▆▆▆▇██▃▆▃█▄▇▇█▅▇▆▄▇▆▅▆▄▄▆▆▇▁█▄▇▄█▅▆▆
train/epoch,▁▁▁▁▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▆▇▇█████
train/global_step,▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▇▂▃▅▅▅▆▇▃▇▂▆▂▁▃▆▃▃▃▂▃█▇▂▆▂▂▂▁▂▄▁▂▁▂▂▁▂▂▁

0,1
eval/loss,0.01536
eval/recall@1,0.9472
eval/recall@3,0.97981
eval/recall@7,0.99379
eval/runtime,6.1086
eval/samples_per_second,527.129
eval/steps_per_second,33.068
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.9675925925925926
Recall@3: 0.9953703703703703
Recall@7: 1.0
Treinando modelo 'multi-qa-mpnet-base-dot-v1' (modelo 3 de 6) com learning rate de 5e-06:


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/212 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0768,0.037556,0.951863,0.992236,1.0
200,0.0544,0.031974,0.945652,0.990683,1.0
300,0.0507,0.029882,0.944099,0.992236,1.0
400,0.0459,0.028135,0.937888,0.986025,1.0
500,0.0436,0.027442,0.940994,0.98913,1.0
600,0.0429,0.026722,0.944099,0.992236,1.0
700,0.0409,0.025111,0.940994,0.993789,1.0
800,0.0374,0.024253,0.942547,0.992236,1.0
900,0.0398,0.023016,0.939441,0.992236,1.0
1000,0.038,0.02194,0.950311,0.995342,1.0


0,1
eval/loss,█▆▅▅▅▄▃▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@1,▃▃▁▂▃▁▄▄▅▆▆▆▆▆▆▅▆▆▆█▇█▇▆▆▆▇▇▇▆▆▆▆▆▆▆▆▆▆▆
eval/recall@3,▆▂▆▇▆█▂▃▂▂▃▃▃▃▅▃▇▅▄▁▂▂▃▃▄▃▃▂▂▂▁▂▃▃▄▃▃▃▃▃
eval/recall@7,██████████████▄▄▄▄▄▄▄▁▁▄▄▄▄▄▁▄▄▄▄▄▄▄▄▄▄▄
eval/runtime,▃▃▂▃▃▃▃▃▃▄▃▃▃▂▁▄▄▃▃▃▃▄▃▄▄▄▃▂▃█▄▄▄▄▄▂▂▃▅▃
eval/samples_per_second,▄▅▃▄▄▃▃▃▃▄▃▄▅▅▂▃▄▄▄▃▂▄▄▃▄▃▄▄▃▃▃█▂▃▃▄▁▃▂▃
eval/steps_per_second,▆▇▆█▇▆▅▆▅▅▆▇█▇▅▅▆▆▆▆▆▆▅▆▅▇▅▅▁▆▅▅▅▅▆▇▃▆▆▇
train/epoch,▁▁▁▁▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇█████
train/global_step,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▇▇▇████
train/grad_norm,▆▄▅▄▇▂▅▇▄▃▆▃█▄▃▅▆▃▄▆▃▄▄▄▄▃▃▂▄▁▆▁▂▁▂▂▅▂▄▃

0,1
eval/loss,0.01716
eval/recall@1,0.95652
eval/recall@3,0.98758
eval/recall@7,0.99845
eval/runtime,12.9273
eval/samples_per_second,249.085
eval/steps_per_second,15.626
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.9583333333333334
Recall@3: 0.9938271604938271
Recall@7: 1.0
Treinando modelo 'multi-qa-mpnet-base-dot-v1' (modelo 3 de 6) com learning rate de 5e-05:


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0557,0.032877,0.968944,0.995342,0.998447
200,0.0386,0.025063,0.936335,0.98913,0.998447
300,0.038,0.024229,0.936335,0.990683,0.998447
400,0.0344,0.023139,0.940994,0.990683,0.998447
500,0.0307,0.021634,0.939441,0.982919,0.995342
600,0.0306,0.020759,0.948758,0.986025,0.998447
700,0.0272,0.019885,0.947205,0.979814,1.0
800,0.0225,0.020368,0.940994,0.987578,0.995342
900,0.0263,0.02245,0.940994,0.973602,0.986025
1000,0.0297,0.019022,0.93323,0.986025,0.998447


0,1
eval/loss,█▅▅▄▃▃▃▂▃▃▂▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▂▂▁▁▁▁▁▁▁▁▁▁
eval/recall@1,█▄▅▅▆▅▄▃▅▄▆▃▄▂▃▃▄▄▃▃▂▁▂▂▃▂▁▁▁▂▂▃▂▂▂▃▃▂▂▂
eval/recall@3,█▆▇▇▅▄▆▄▄▃▇▄▄▃▅▇▅▄▅▅▃▂▁▂▂▃▃▄▅▆▄▄▄▄▄▃▄▄▄▄
eval/recall@7,█████▆▁█▅▅██▅██▇███▆▆█▇▆▆██▆█▇▆▅▅▇███▇██
eval/runtime,▃▁▂▁▁▃▂▂▂▂▁▂▂▂▃▂▁▂▃▃▃▂▃▂▁▄▃▂▂▂▅▂▃▁▆▂▃█▄▁
eval/samples_per_second,▇▇▆█▇▇▆▇█▇▆▆▆▆▆▆▇▅▆▅▆▆▆▇▆▆▇▇▆▆▇▆▅▇▆▂▇▅▁█
eval/steps_per_second,▆█▇█▆▆▇▇▇▇▇▆▆▆▆▇█▇▇▇▆▇▆▇▆█▆▅▆▇▄▇▅▇▇█▆▆▁█
train/epoch,▁▁▁▁▁▂▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇████
train/global_step,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇█████
train/grad_norm,▆▆▄▇▃▅▃▄▃█▄█▁▂▂▂▂▅▃▂▂▂▂▇▂▂▃▂▃▁▁▁▁▃▁▁▁▂▁▂

0,1
eval/loss,0.01411
eval/recall@1,0.9146
eval/recall@3,0.98137
eval/recall@7,0.99845
eval/runtime,12.7915
eval/samples_per_second,251.729
eval/steps_per_second,15.792
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.9614197530864198
Recall@3: 0.9891975308641975
Recall@7: 0.9984567901234568
Treinando modelo 'multi-qa-mpnet-base-cos-v1' (modelo 4 de 6) com learning rate de 5e-06:


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0533,0.031985,0.954969,0.993789,0.995342
200,0.0387,0.028229,0.96118,0.990683,1.0
300,0.0358,0.026076,0.951863,0.995342,1.0
400,0.0327,0.024273,0.948758,0.993789,1.0
500,0.0315,0.023268,0.951863,0.995342,1.0
600,0.031,0.022562,0.956522,0.993789,1.0
700,0.0292,0.021482,0.953416,0.992236,0.996894
800,0.026,0.020857,0.958075,0.993789,1.0
900,0.029,0.019917,0.954969,0.993789,0.996894
1000,0.0269,0.019204,0.959627,0.993789,1.0


0,1
eval/loss,█▆▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▁▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@1,▃▆▂▁▂▃▃▃▅▄▄▆▄▆▄▇▇█▇▇▇▆▆▇▇▇▆▅▅▃▄▄▅▄▄▄▄▄▄▄
eval/recall@3,▇▆▇█▇▇▇▆▂▂▂▂▂▄▂▄▄▄▄▅▄▁▄▅▅▄▄▄▄▃▁▁▂▁▁▄▄▄▂▁
eval/recall@7,▃████▅█▅██▅█▅▃▅▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▃▁▁▁▁▁▁▁▁
eval/runtime,▇▅▁▁▄▆▅▃▃▅▄▃▆▅▂▁▄▂▃▄▇▃▆▄▄▃▅██▁▃▅▅▆▅█▄▅▆▂
eval/samples_per_second,▂▆▃▅▇▅▆▄▃▂▅▆▃▃▆▂▇▅▆▅▅▂▅▃▄▆█▅▄▁▇▃▄▁▄▂▄▃▄▃
eval/steps_per_second,▂▆▄▅▇▃▆▆▄▄▆▃▃▅▇▆▅▂▅▃▆█▅▅▄▆▁▇▅▁▄▃▃▁▄▄▃▄▃▇
train/epoch,▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇███
train/global_step,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇▇██
train/grad_norm,▆▃█▂▃▄▇▇▃▃█▃▃▅▄▃▆▄▇▄▇▂▆▆▅▁▄▅▂▆▁▅▁▁▂▂▂▃▂▅

0,1
eval/loss,0.01533
eval/recall@1,0.95807
eval/recall@3,0.97826
eval/recall@7,0.99379
eval/runtime,12.8438
eval/samples_per_second,250.704
eval/steps_per_second,15.727
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.9691358024691358
Recall@3: 0.9969135802469136
Recall@7: 1.0
Treinando modelo 'multi-qa-mpnet-base-cos-v1' (modelo 4 de 6) com learning rate de 5e-05:


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0416,0.027516,0.96118,0.998447,1.0
200,0.0288,0.02213,0.953416,0.990683,0.995342
300,0.0267,0.019757,0.968944,0.996894,1.0
400,0.0236,0.019554,0.964286,0.986025,0.998447
500,0.0238,0.019192,0.956522,0.996894,1.0
600,0.0216,0.017928,0.959627,0.990683,0.998447
700,0.0198,0.01954,0.951863,0.98913,0.998447
800,0.0184,0.017379,0.97205,0.998447,1.0
900,0.0206,0.018165,0.951863,0.984472,0.993789
1000,0.0216,0.017073,0.970497,0.992236,0.995342


0,1
eval/loss,█▆▅▅▄▅▄▄▄▃▃▂▂▃▃▃▂▂▂▂▂▂▂▂▂▂▁▁▂▂▁▁▁▁▁▁▁▁▁▁
eval/recall@1,▃▁▄▂▃▁▆█▂▆▄▃▆▃▄▆▆▇▄▆▆▃▅▄▃▆▆▆▂▃▅▄▄▃▃▅▅▄▅▄
eval/recall@3,▇▂▇▄▃▁▅▆▄▄▆▆▄▆▆▁▇▇▆▄▇███▇█▇▂▅▇▇██▅▇▇▇▇▇▇
eval/recall@7,▆█▆▆█▁▆██▆█▃█▆█▆▃▆███████████▁▆█████████
eval/runtime,█▇▇█▇▇█████▇█████▇█▇▃▂▂▂▂▂▃▄▇▇▇▇▂▁▁▁▁▁▄▇
eval/samples_per_second,▁▂▁▂▁▁▁▁▁▁▁▂▁▁▂▁▁▁▁▁▁▂▅▆▇▇▇▆▅▂▂▂▅▇███▅▂▂
eval/steps_per_second,▁▂▂▁▂▂▁▁▁▁▁▁▂▁▁▁▁▁▁▁▆▇▇▇▇▇▆▅▂▂▂▂██████▂▁
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▆▆▆▆▆▆▆▆▆▆▇▇▇▇▇▇██
train/global_step,▁▁▁▁▁▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇█████
train/grad_norm,█▄▄▃▄▄▅▅▄▇▃▅▆▂▁▂▇▂▄▅▂▃▂▅▁▃▁▁▂▁▁▁▁▁▁▁▁▂▁▁

0,1
eval/loss,0.01107
eval/recall@1,0.96429
eval/recall@3,0.99845
eval/recall@7,1
eval/runtime,12.8794
eval/samples_per_second,250.011
eval/steps_per_second,15.684
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.9583333333333334
Recall@3: 0.9953703703703703
Recall@7: 0.9984567901234568
Treinando modelo 'multi-qa-distilbert-cos-v1' (modelo 5 de 6) com learning rate de 5e-06:


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/523 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/333 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0555,0.036003,0.944099,0.990683,0.998447
200,0.041,0.031436,0.93323,0.986025,0.996894
300,0.037,0.029425,0.930124,0.986025,0.996894
400,0.0336,0.027935,0.923913,0.984472,0.998447
500,0.0343,0.026734,0.923913,0.984472,0.998447
600,0.0325,0.02633,0.923913,0.990683,0.998447
700,0.0306,0.025163,0.920807,0.98913,0.995342
800,0.0279,0.024539,0.927019,0.990683,0.996894
900,0.0306,0.024075,0.925466,0.98913,0.998447
1000,0.0296,0.023448,0.928571,0.990683,0.998447


## Continuação depois de interrompido

In [None]:
modelos = [
    "multi-qa-distilbert-cos-v1",
    "multi-qa-MiniLM-L6-cos-v1",
]

learning_rates = [5e-06,5e-05]

In [None]:
resultados = pd.read_csv(folder+"/resultados_finetuning.csv")
resultados

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Modelo Base,Learning Rate,Recall@1,Recall@3,Recall@7
0,0,0,0,all-MiniLM-L6-v2,5e-06,0.924383,0.976852,0.996914
1,1,1,1,all-MiniLM-L12-v2,5e-06,0.955247,0.993827,1.0
2,2,2,2,multi-qa-mpnet-base-dot-v1,5e-06,0.958333,0.993827,1.0
3,3,3,3,multi-qa-mpnet-base-cos-v1,5e-06,0.969136,0.996914,1.0
4,4,4,4,multi-qa-distilbert-cos-v1,5e-06,0.942901,0.989198,0.996914
5,5,5,5,multi-qa-MiniLM-L6-cos-v1,5e-06,0.929012,0.983025,0.993827
6,6,6,6,all-MiniLM-L6-v2,5e-05,0.950617,0.983025,0.998457
7,7,7,7,all-MiniLM-L12-v2,5e-05,0.967593,0.99537,1.0
8,8,8,8,multi-qa-mpnet-base-dot-v1,5e-05,0.96142,0.989198,0.998457
9,9,9,9,multi-qa-mpnet-base-cos-v1,5e-05,0.958333,0.99537,0.998457


In [None]:
for i,modelo in enumerate(modelos):
  for learning_rate in learning_rates:
    print(f"Treinando modelo '{modelo}' (modelo {i+1} de 2) com learning rate de {learning_rate}:")
    nome = f"{modelo}_{learning_rate}"
    epocas = 4

    run = wandb.init(project="conserta-avioes_fine-tuning_01",name=nome)

    model = SentenceTransformer(modelo)
    train_loss = losses.CosineSimilarityLoss(model=model)

    args = SentenceTransformerTrainingArguments(
      output_dir=nome,
      num_train_epochs=epocas,
      per_device_train_batch_size=16,
      per_device_eval_batch_size=16,
      eval_strategy="steps",
      eval_steps=100,
      save_strategy="steps",
      save_steps=100,
      save_total_limit=2,
      logging_steps=100,
      load_best_model_at_end=True,
      metric_for_best_model="recall@1",
      greater_is_better=True,
      learning_rate=learning_rate,
      #max_grad_norm = 1.0,
      run_name=nome
    )

    trainer = SentenceTransformerTrainer(
      model=model,
      args=args,
      train_dataset=dataset_treino,
      eval_dataset=dataset_val,
      loss=train_loss,
      evaluator=RecallAtKEvaluator()
    )

    trainer.train()

    run.finish()

    model.save(folder+"/modelos/"+nome)

    model.eval()
    recall_1_teste = recall_at_k(model,1,corpus_teste,queries_teste)
    recall_3_teste = recall_at_k(model,3,corpus_teste,queries_teste)
    recall_7_teste = recall_at_k(model,7,corpus_teste,queries_teste)
    print(f"Recall@1: {recall_1_teste}")
    print(f"Recall@3: {recall_3_teste}")
    print(f"Recall@7: {recall_7_teste}")

    resultados.loc[(resultados['Modelo Base']==modelo)&(resultados['Learning Rate']==learning_rate),'Recall@1'] = recall_1_teste
    resultados.loc[(resultados['Modelo Base']==modelo)&(resultados['Learning Rate']==learning_rate),'Recall@3'] = recall_3_teste
    resultados.loc[(resultados['Modelo Base']==modelo)&(resultados['Learning Rate']==learning_rate),'Recall@7'] = recall_7_teste

    resultados.to_csv(folder+"/resultados_finetuning.csv")

Treinando modelo 'multi-qa-distilbert-cos-v1' (modelo 1 de 2) com learning rate de 5e-06:


  | |_| | '_ \/ _` / _` |  _/ -_)
[34m[1mwandb[0m: Currently logged in as: [33mana-sovat[0m ([33mana-sovat-[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/523 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/333 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]



Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0581,0.035727,0.947205,0.990683,0.998447
200,0.0405,0.031637,0.936335,0.990683,0.996894
300,0.0379,0.029806,0.923913,0.986025,0.996894
400,0.039,0.027678,0.925466,0.982919,0.996894
500,0.0321,0.026929,0.92236,0.987578,0.996894
600,0.0329,0.026717,0.92236,0.990683,0.998447
700,0.028,0.02589,0.92236,0.992236,0.996894
800,0.0265,0.025291,0.919255,0.990683,0.996894
900,0.0271,0.024567,0.925466,0.990683,0.998447
1000,0.0269,0.023872,0.931677,0.990683,0.996894


0,1
eval/loss,█▆▆▅▅▄▄▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@1,▇▅▂▂▂▁▂▄▄▄▅▅▆▇▆▇▇▆▇█████▇▇▇▇▇▇▇█▇▆▇▇▇▇▇▇
eval/recall@3,▇▇▃▁▄█▇▇▇▇▇▇▆▄▄▄▇▄▃▄▄▃▆▃▄▇▄▂▂▂▃▃▃▃▃▃▂▄▂▂
eval/recall@7,▄▁▁▁▁▁▁▁▁▁▁▄▄▄▄▄█▁██▄██████████▄▄▄▄▄▄▄▄▄
eval/runtime,▃▁▁▃▅▄▆▅▆▇▆▄▅▆▅▆▆▅▅▇▅▆▄▇█▇█▆▄▅▄▆▆█▅█▅▄▅▄
eval/samples_per_second,▅██▅▄▃▄▃▃▄▃▃▄▂▄▃▄▂▄▂▂▄▄▂▁▃▄▄▄▃▃▄▁▄▄▅▄▃▅▄
eval/steps_per_second,▅██▅▆▁▅▃▄▃▂▂▅▄▄▃▄▁▃▂▄▄▃▄▂▅▂▁▃▄▄▃▁▄▃▄▃▄▃▄
train/epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▁▁▂▂▂▂▃▃▃▃▄▄▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
train/grad_norm,▅█▂▅▆▃▅▂▇▃▂▄▄▄▂▃▂▄▂▂▃▂▂▇▂▂▂▂▆▁▁▂▁▃▂▃▄▄▇▅

0,1
eval/loss,0.01785
eval/recall@1,0.94565
eval/recall@3,0.98447
eval/recall@7,0.99845
eval/runtime,6.67
eval/samples_per_second,482.756
eval/steps_per_second,30.285
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.9429012345679012
Recall@3: 0.9891975308641975
Recall@7: 0.9969135802469136
Treinando modelo 'multi-qa-distilbert-cos-v1' (modelo 1 de 2) com learning rate de 5e-05:


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.042,0.030141,0.93323,0.98913,0.996894
200,0.029,0.024383,0.919255,0.987578,0.996894
300,0.0275,0.024197,0.916149,0.97205,0.998447
400,0.0283,0.02253,0.934783,0.978261,0.993789
500,0.0213,0.021381,0.934783,0.979814,0.990683
600,0.0216,0.020302,0.942547,0.992236,0.992236
700,0.0178,0.019218,0.942547,0.990683,0.996894
800,0.0166,0.018381,0.942547,0.98913,1.0
900,0.0134,0.018245,0.934783,0.986025,0.998447
1000,0.0139,0.018767,0.930124,0.975155,0.998447


0,1
eval/loss,█▆▅▄▃▂▂▂▁▁▁▁▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@1,▃▁▁▄▅▅▇▅█▆█▇▆▇█▅▆▇▆▇▇▇▇▇▆▆▇▆▇▆▇▇▇▆▇▆▆▇▇▇
eval/recall@3,▆▁▃▃▇▅▂▆▆█▇▇▇▆▄▆▄▅▅█▅▅▅▇▇▄▄▅█▅▅▆▆▅▅▆▆▆▆▅
eval/recall@7,▆▇▄▃▆▇▇▇███▇██▇▄▆▁▅▇█▇▅▆█▆█▇█▆▅▆▆▇▆▆▆▆▆▇
eval/runtime,▆▄▁▄▄▂▂▁▃▃▂█▂▄▃▃▅▄▂█▃▃▂▃▆▄▂▅█▂▂▄▃█▂▇▆▅▇▁
eval/samples_per_second,▃▅▆▇▅▄█▆▇▂▆▆▁▅▇▃▅▄▄▇▆▆▇▆▆▂▄▅▇▄▇▇▅▅█▂▃▄▁█
eval/steps_per_second,▃██▆▆▅▄▇▆▇█▆▆▆▇▆▄▇▅▃▇▁▆▅▆▄▆▄▄▁▅▆▅▅▁█▄▃▄▇
train/epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇████
train/global_step,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇▇▇▇██
train/grad_norm,▆▄█▄▂▂▂▅▂▅▆▁▁▂▂▃▆▃▁▆▁▁▁▁▂▂▁▂▁▁▅▁▁▁▂▂▁▁▁▁

0,1
eval/loss,0.01388
eval/recall@1,0.95963
eval/recall@3,0.98602
eval/recall@7,0.99845
eval/runtime,6.4842
eval/samples_per_second,496.588
eval/steps_per_second,31.152
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.9614197530864198
Recall@3: 0.9891975308641975
Recall@7: 0.9984567901234568
Treinando modelo 'multi-qa-MiniLM-L6-cos-v1' (modelo 2 de 2) com learning rate de 5e-06:


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/383 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0635,0.040844,0.930124,0.993789,0.998447
200,0.0493,0.036535,0.928571,0.986025,0.998447
300,0.0461,0.033719,0.925466,0.978261,0.998447
400,0.0479,0.03165,0.92236,0.978261,0.996894
500,0.0421,0.030789,0.919255,0.976708,0.996894
600,0.0404,0.030307,0.916149,0.979814,0.995342
700,0.0367,0.029585,0.916149,0.976708,0.993789
800,0.0346,0.028923,0.917702,0.975155,0.996894
900,0.0351,0.028481,0.917702,0.970497,0.995342
1000,0.035,0.027725,0.920807,0.975155,0.996894


0,1
eval/loss,█▅▄▄▄▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@1,▇▆▅▄▂▁▂▂▃▄▂▄▄▃▄▄▃▅▅▇▅▅▅▅▅▆▇▆▆▆▇▇▇█▇▇▇▇▇▆
eval/recall@3,█▆▃▃▄▂▁▂▄▄▂▂▂▂▂▂▂▂▄▄▂▁▂▂▂▂▂▃▂▂▂▃▂▂▂▂▂▂▂▂
eval/recall@7,██▇▇▅▇▅▇██▇▅▅▂▅▂▄▅▄▁▂▁▁▁▁▁▁▁▁▂▁▁▁▁▁▁▁▁▁▁
eval/runtime,▁▄▃▁▅▁▆▁▆▁▇▁▇▄▁█▁▇▁▇▂▁█▁▁▁▂▁▁▇▁█▂█▂█▂▂▂▂
eval/samples_per_second,▇▄▆█▃▃▇▇▂█▂██▁█▁█▂█▂▇▇▇▁▇▁█▇▁▇▇▁▇▁▇▁▇▆▂▇
eval/steps_per_second,▇▄▆█▄██▇▇▁▇▁▇▄▂█▁██▁█▁▇▁▇▁▁█▇▁▁▇█▁▇▇▇▇▆▆
train/epoch,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇▇███
train/grad_norm,▅▆▂▄▆▂▃▆▂█▃▆▂▂▂▄▅▄▃▅▂▂▆▄▂▄▂▃▂▂▃▁▂▁▂▃▃▄▂▁

0,1
eval/loss,0.02142
eval/recall@1,0.92857
eval/recall@3,0.9736
eval/recall@7,0.99068
eval/runtime,3.609
eval/samples_per_second,892.226
eval/steps_per_second,55.972
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.9290123456790124
Recall@3: 0.9830246913580247
Recall@7: 0.9938271604938271
Treinando modelo 'multi-qa-MiniLM-L6-cos-v1' (modelo 2 de 2) com learning rate de 5e-05:


Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0475,0.030605,0.940994,0.981366,0.995342
200,0.0333,0.026926,0.925466,0.975155,0.98913
300,0.0335,0.027619,0.92236,0.973602,0.982919
400,0.0328,0.025698,0.920807,0.964286,0.979814
500,0.0274,0.025383,0.936335,0.968944,0.987578
600,0.0276,0.023587,0.93323,0.973602,0.98913
700,0.0223,0.021665,0.939441,0.976708,0.984472
800,0.0214,0.021765,0.920807,0.968944,0.98913
900,0.019,0.020669,0.93323,0.975155,0.986025
1000,0.0207,0.020093,0.934783,0.979814,0.98913


0,1
eval/loss,█▆▆▆▅▃▃▃▂▂▂▂▂▂▂▂▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@1,▂▁▁▄▁▅▄▆▅▆▆▆▆▃▇█▆▇▄▃▅▅▆▆▅▅▅▄▄▅▄▅▄▄▄▅▄▅▅▅
eval/recall@3,▇▄▁▃▅▆▂▅▇▅▃█▅▄▅▅█▅▃▃▅▅▅█▅▅▅▅▆▅▅▅▅▅▆▅▅▄▅▅
eval/recall@7,▅▁▃▅▃▆█▅▃▆▄▄▅▄▆▄▆▆▆▅▅▇▅▅▅▆▆▅▆▄▄▅▆▆▇▆▆▆▆▅
eval/runtime,▄▄▁▅▃▆▅▂▅▂▅▁▅▁▄▆▁▃▁▆▁▄▄▁█▂█▂▁▂▇▄▇▄▁▁▆▃▆▁
eval/samples_per_second,▄▄█▃▅▂▄▇▄▇▇▄█▃█▄▇▃▅█▂▆█▅▅▁█▇▁▇▇▂▆▇▂██▃██
eval/steps_per_second,▄▄█▃█▄▃▇▃▃▄█▄▃▄▅█▃▅██▃▅▅▇█▇▁▇▇▇▆▇▂██▃▅▂█
train/epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▄▄▄▄▄▄▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇▇▇▇█
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇▇███
train/grad_norm,▆▃▆▇▃▂▄▃█▃▂▃▃▅▂▂▃▅▁▂▁▃▁▃▃▃▄▂▄▂▂▂▃▂▂▃▄▂▂▂

0,1
eval/loss,0.01596
eval/recall@1,0.93478
eval/recall@3,0.97671
eval/recall@7,0.99068
eval/runtime,3.5343
eval/samples_per_second,911.078
eval/steps_per_second,57.155
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.9429012345679012
Recall@3: 0.9861111111111112
Recall@7: 0.9969135802469136


In [None]:
modelos = [
    "all-MiniLM-L6-v2",
    "all-MiniLM-L12-v2",
    "multi-qa-mpnet-base-cos-v1",
    "multi-qa-distilbert-cos-v1",
]

learning_rates = [5e-04]

In [None]:
for i,modelo in enumerate(modelos):
  for learning_rate in learning_rates:
    print(f"Treinando modelo '{modelo}' (modelo {i+1} de 4) com learning rate de {learning_rate}:")
    nome = f"{modelo}_{learning_rate}"
    epocas = 4

    run = wandb.init(project="conserta-avioes_fine-tuning_01",name=nome)

    model = SentenceTransformer(modelo)
    train_loss = losses.CosineSimilarityLoss(model=model)

    args = SentenceTransformerTrainingArguments(
      output_dir=nome,
      num_train_epochs=epocas,
      per_device_train_batch_size=16,
      per_device_eval_batch_size=16,
      eval_strategy="steps",
      eval_steps=100,
      save_strategy="steps",
      save_steps=100,
      save_total_limit=2,
      logging_steps=100,
      load_best_model_at_end=True,
      metric_for_best_model="recall@1",
      greater_is_better=True,
      learning_rate=learning_rate,
      #max_grad_norm = 1.0,
      run_name=nome
    )

    trainer = SentenceTransformerTrainer(
      model=model,
      args=args,
      train_dataset=dataset_treino,
      eval_dataset=dataset_val,
      loss=train_loss,
      evaluator=RecallAtKEvaluator()
    )

    trainer.train()

    run.finish()

    model.save(folder+"/modelos/"+nome)

    model.eval()
    recall_1_teste = recall_at_k(model,1,corpus_teste,queries_teste)
    recall_3_teste = recall_at_k(model,3,corpus_teste,queries_teste)
    recall_7_teste = recall_at_k(model,7,corpus_teste,queries_teste)
    print(f"Recall@1: {recall_1_teste}")
    print(f"Recall@3: {recall_3_teste}")
    print(f"Recall@7: {recall_7_teste}")

    resultados.loc[(resultados['Modelo Base']==modelo)&(resultados['Learning Rate']==learning_rate),'Recall@1'] = recall_1_teste
    resultados.loc[(resultados['Modelo Base']==modelo)&(resultados['Learning Rate']==learning_rate),'Recall@3'] = recall_3_teste
    resultados.loc[(resultados['Modelo Base']==modelo)&(resultados['Learning Rate']==learning_rate),'Recall@7'] = recall_7_teste

    resultados.to_csv(folder+"/resultados_finetuning.csv")

Treinando modelo 'all-MiniLM-L6-v2' (modelo 1 de 4) com learning rate de 0.0005:


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]



Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0608,0.045014,0.843168,0.944099,0.970497
200,0.0588,0.052453,0.807453,0.913043,0.958075
300,0.0605,0.053969,0.795031,0.913043,0.981366
400,0.0515,0.065571,0.725155,0.863354,0.93323
500,0.0546,0.045483,0.796584,0.897516,0.948758
600,0.0438,0.045496,0.790373,0.886646,0.942547
700,0.0423,0.039627,0.829193,0.920807,0.973602
800,0.0469,0.040447,0.805901,0.923913,0.967391
900,0.049,0.038188,0.801242,0.927019,0.965839
1000,0.0512,0.042805,0.796584,0.906832,0.942547


0,1
eval/loss,▆█▅▅▄▃▅▄▄▂▃▃▂▂▂▃▂▂▁▁▁▁▁▁▂▂▂▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@1,█▂▂▁▃▃▁▃▃▅▅▅▆▅▅▇▇█▆▇▆▃▃▅▆▃▅▆▅▆▅▅▅▆▅▅▆▆▆▆
eval/recall@3,▇▄▄▂▁▅▃▅▅▅▄▆▅▅█▇▆▇▆▇▇▇▅▆▆▆▇▇▆▆█▇▆▇▇▇▇▇██
eval/recall@7,▆▇▁▃▂▅▂▄▆▄▄▄▅▄▅▅▆▅▆▅▆▇▆█▆▆▆▅▇▇▇██▇▇▇▇▇▇▇
eval/runtime,▁▁▂▁▁▂█▁▁▅▁▂▆▁▂▃▅▄▁▄▂▇▂▂█▁█▆█▁▇▂▂▂▁▂▆▅▁█
eval/samples_per_second,██▁█▁█▁█▇▇▅▄▅▃█▃▃▆█▃▄▄▇▇▇█▁▃▁█▇▇▃▄▇▁▇▂▇▁
eval/steps_per_second,██▁▇██▇▇▇▁▄▇▃█▆▃▆█▃▅▄▄▇▂▁▁▃▁█▇▇▃▄▇▇▁▇▂▇▁
train/epoch,▁▁▁▁▁▂▂▂▃▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▅▆▆▇▇▇▇▇▇███
train/grad_norm,▃▄▅█▃▁▃▃▁▂▃▂▃▂▂▂▂▂▂▁▁▁▃▁▂▁▂▂▂▁▁▂▁▂▂▁▂▁▁▁

0,1
eval/loss,0.02339
eval/recall@1,0.82919
eval/recall@3,0.95652
eval/recall@7,0.98137
eval/runtime,4.8099
eval/samples_per_second,669.45
eval/steps_per_second,41.997
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.8425925925925926
Recall@3: 0.9367283950617284
Recall@7: 0.9861111111111112
Treinando modelo 'all-MiniLM-L12-v2' (modelo 2 de 4) com learning rate de 0.0005:


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/615 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/352 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.0975,0.11555,0.630435,0.798137,0.849379
200,0.1349,0.274951,0.093168,0.180124,0.285714
300,0.378,0.5,0.004658,0.015528,0.026398
400,0.3549,0.5,0.006211,0.018634,0.045031
500,0.3391,0.5,0.007764,0.015528,0.043478
600,0.3184,0.5,0.007764,0.017081,0.048137
700,0.3083,0.5,0.001553,0.007764,0.034161
800,0.3016,0.5,0.004658,0.017081,0.034161
900,0.314,0.5,0.006211,0.018634,0.040373
1000,0.2978,0.5,0.018634,0.035714,0.065217


0,1
eval/loss,▁▄██████████████████████████████████████
eval/recall@1,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@3,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@7,█▁▂▁▂▁▂▁▁▁▂▁▁▁▁▂▁▁▁▂▂▂▁▁▁▂▂▁▁▂▁▂▂▁▂▁▁▁▁▂
eval/runtime,▁▅▇█▆▅▁▁▅▅▄▇▃▇█▁▂▅▄▁▁▁▁▇█▂▆▃▆▁▅▁▁▄▇▁▆▁▆▁
eval/samples_per_second,█▃▃▁▂▃▇▇▄█▆▁▆▂▁█▁▇▄▅███▁▇▆▃▄█▄█▄▂███▃▇▄█
eval/steps_per_second,█▃▃▂▂▄▇▇▄█▂▂██▁▅█▂███▂▁▇▆▅▃▆▂██▄▁██▂▇▃▄█
train/epoch,▁▁▁▁▁▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇▇▇▇▇█
train/global_step,▁▁▁▁▂▂▂▂▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇▇▇▇▇████
train/grad_norm,▇█▁▄▄▃▄▄▃▃▃▃▃▂▃▂▃▃▃▄▂▃▂▂▃▂▃▄▃▂▃▃▃▃▂▃▂▂▂▄

0,1
eval/loss,0.5
eval/recall@1,0.00776
eval/recall@3,0.01708
eval/recall@7,0.04503
eval/runtime,5.6055
eval/samples_per_second,574.439
eval/steps_per_second,36.036
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.6635802469135802
Recall@3: 0.7978395061728395
Recall@7: 0.8734567901234568
Treinando modelo 'multi-qa-mpnet-base-cos-v1' (modelo 3 de 4) com learning rate de 0.0005:


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/438M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.3585,0.5,0.007764,0.020186,0.037267
200,0.3606,0.5,0.001553,0.015528,0.041925
300,0.3372,0.5,0.007764,0.021739,0.062112
400,0.4218,0.5,0.007764,0.013975,0.037267
500,0.3733,0.5,0.007764,0.021739,0.043478
600,0.3611,0.5,0.009317,0.021739,0.046584
700,0.3338,0.5,0.004658,0.018634,0.029503
800,0.3206,0.5,0.006211,0.015528,0.054348
900,0.321,0.5,0.009317,0.017081,0.032609
1000,0.3244,0.5,0.004658,0.023292,0.045031


0,1
eval/loss,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@1,▅▂▅▅▆▅▆▆▄▅▁▃▄▃█▃▅▇▂▆▂▃▃▅▄▂▂▅▄▇▅▅▄▂▂▄▅▆▅▅
eval/recall@3,▅▃▆▅▃▆▄▅▃▇▆▅▃▄█▅▅▇▇▃▃▅▁▅▃▇▂▄▄▅▆▃▄▅▄▁▅▅▆▅
eval/recall@7,▂█▂▄▆▄▂▃▇▅▂▅▅▂▄▄▃▄▃▄▆▄▁▅▁▁▄▄▄▅▅▂▁▁▅▃▅▄▃▃
eval/runtime,▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▅▁▅▄▄██▁▁
eval/samples_per_second,███████████████████████████▄█▃▆███▄▃▁███
eval/steps_per_second,██████▇████████████████████▄█▄▆██▄▁▅▂███
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▄▄▄▅▅▆▆▆▆▆▇▇▇▇▇█████
train/global_step,▁▁▂▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▆▆▆▆▆▆▆▆▆▇▇████
train/grad_norm,▂▁▂▁▁▂▃▂▃▃█▁▁▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▂

0,1
eval/loss,0.5
eval/recall@1,0.00621
eval/recall@3,0.01863
eval/recall@7,0.04037
eval/runtime,12.3764
eval/samples_per_second,260.173
eval/steps_per_second,16.321
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.006172839506172839
Recall@3: 0.007716049382716049
Recall@7: 0.029320987654320986
Treinando modelo 'multi-qa-distilbert-cos-v1' (modelo 4 de 4) com learning rate de 0.0005:


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/523 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/333 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Computing widget examples:   0%|          | 0/1 [00:00<?, ?example/s]

Step,Training Loss,Validation Loss,Recall@1,Recall@3,Recall@7
100,0.1196,0.129504,0.397516,0.551242,0.667702
200,0.34,0.337831,0.062112,0.136646,0.25
300,0.4257,0.5,0.009317,0.029503,0.057453
400,0.4833,0.5,0.006211,0.017081,0.040373
500,0.3377,0.5,0.001553,0.013975,0.035714
600,0.2737,0.5,0.006211,0.017081,0.060559
700,0.2616,0.5,0.003106,0.012422,0.035714
800,0.2583,0.5,0.0,0.009317,0.03882
900,0.2629,0.5,0.01087,0.02795,0.052795
1000,0.2644,0.5,0.004658,0.017081,0.040373


0,1
eval/loss,▁███████████████████████████████████████
eval/recall@1,█▂▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/recall@3,█▂▂▁▂▁▂▁▁▂▁▂▂▂▂▁▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂▁▂▂▂▂▂▂▂▂
eval/recall@7,█▃▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁▁
eval/runtime,▅▃▁▅▁▆▅▃▄▃▅▃▂▂▂▇▂▃█▂▄▃▂▅▃▄▆▄▂█▂▇▆▇▅▅▃▄█▅
eval/samples_per_second,█▆▇█▄▃▃▄▆▆▆▆▆▇▇▂▄▆▆▃▂▅▄▆▇▁▆▅▃▃▄▁▇▃▃▆▅▁▃▇
eval/steps_per_second,▄█▆▇█▄▂▃▂▄▆▅▆▇▇▂▄▆▅▇▅▃▆▇▄▆▇▃▅▃▆▆▂▃▂▄▆▅▁▇
train/epoch,▁▁▁▂▂▂▂▃▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
train/global_step,▁▁▁▁▁▂▂▂▂▂▂▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▅▆▆▆▆▆▆▆▇▇███
train/grad_norm,█▆▁▃▁▂▂▂▂▁▂▂▁▂▂▂▂▂▂▂▁▂▂▂▁▂▁▁▂▂▂▁▂▁▁▂▁▁▁▃

0,1
eval/loss,0.5
eval/recall@1,0.00311
eval/recall@3,0.01863
eval/recall@7,0.04503
eval/runtime,6.2692
eval/samples_per_second,513.62
eval/steps_per_second,32.221
total_flos,0
train/epoch,4
train/global_step,6456


Recall@1: 0.42746913580246915
Recall@3: 0.5864197530864198
Recall@7: 0.7037037037037037


In [None]:
novos = pd.DataFrame({"Modelo Base":modelos,"Learning Rate":[5e-04]*4,"Recall@1":[None]*4,"Recall@3":[None]*4,"Recall@7":[None]*4})
novos

Unnamed: 0,Modelo Base,Learning Rate,Recall@1,Recall@3,Recall@7
0,all-MiniLM-L6-v2,0.0005,,,
1,all-MiniLM-L12-v2,0.0005,,,
2,multi-qa-mpnet-base-cos-v1,0.0005,,,
3,multi-qa-distilbert-cos-v1,0.0005,,,


In [None]:
for i,modelo in enumerate(modelos):
  for learning_rate in learning_rates:
    nome = f"{modelo}_{learning_rate}"
    model = SentenceTransformer(folder+"/modelos/"+nome)

    model.eval()
    recall_1_teste = recall_at_k(model,1,corpus_teste,queries_teste)
    recall_3_teste = recall_at_k(model,3,corpus_teste,queries_teste)
    recall_7_teste = recall_at_k(model,7,corpus_teste,queries_teste)
    print(f"Recall@1: {recall_1_teste}")
    print(f"Recall@3: {recall_3_teste}")
    print(f"Recall@7: {recall_7_teste}")

    novos.loc[(novos['Modelo Base']==modelo)&(novos['Learning Rate']==learning_rate),'Recall@1'] = recall_1_teste
    novos.loc[(novos['Modelo Base']==modelo)&(novos['Learning Rate']==learning_rate),'Recall@3'] = recall_3_teste
    novos.loc[(novos['Modelo Base']==modelo)&(novos['Learning Rate']==learning_rate),'Recall@7'] = recall_7_teste

Recall@1: 0.8425925925925926
Recall@3: 0.9367283950617284
Recall@7: 0.9861111111111112
Recall@1: 0.6635802469135802
Recall@3: 0.7978395061728395
Recall@7: 0.8734567901234568
Recall@1: 0.004629629629629629
Recall@3: 0.006172839506172839
Recall@7: 0.026234567901234566
Recall@1: 0.42746913580246915
Recall@3: 0.5864197530864198
Recall@7: 0.7037037037037037


In [None]:
    resultados2 = pd.concat([resultados,novos])
    resultados2

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Modelo Base,Learning Rate,Recall@1,Recall@3,Recall@7
0,0.0,0.0,0.0,all-MiniLM-L6-v2,5e-06,0.924383,0.976852,0.996914
1,1.0,1.0,1.0,all-MiniLM-L12-v2,5e-06,0.955247,0.993827,1.0
2,2.0,2.0,2.0,multi-qa-mpnet-base-dot-v1,5e-06,0.958333,0.993827,1.0
3,3.0,3.0,3.0,multi-qa-mpnet-base-cos-v1,5e-06,0.969136,0.996914,1.0
4,4.0,4.0,4.0,multi-qa-distilbert-cos-v1,5e-06,0.942901,0.989198,0.996914
5,5.0,5.0,5.0,multi-qa-MiniLM-L6-cos-v1,5e-06,0.929012,0.983025,0.993827
6,6.0,6.0,6.0,all-MiniLM-L6-v2,5e-05,0.950617,0.983025,0.998457
7,7.0,7.0,7.0,all-MiniLM-L12-v2,5e-05,0.967593,0.99537,1.0
8,8.0,8.0,8.0,multi-qa-mpnet-base-dot-v1,5e-05,0.96142,0.989198,0.998457
9,9.0,9.0,9.0,multi-qa-mpnet-base-cos-v1,5e-05,0.958333,0.99537,0.998457


In [None]:
resultados2 = resultados2.iloc[:,3:]
resultados2

Unnamed: 0,Modelo Base,Learning Rate,Recall@1,Recall@3,Recall@7
0,all-MiniLM-L6-v2,5e-06,0.924383,0.976852,0.996914
1,all-MiniLM-L12-v2,5e-06,0.955247,0.993827,1.0
2,multi-qa-mpnet-base-dot-v1,5e-06,0.958333,0.993827,1.0
3,multi-qa-mpnet-base-cos-v1,5e-06,0.969136,0.996914,1.0
4,multi-qa-distilbert-cos-v1,5e-06,0.942901,0.989198,0.996914
5,multi-qa-MiniLM-L6-cos-v1,5e-06,0.929012,0.983025,0.993827
6,all-MiniLM-L6-v2,5e-05,0.950617,0.983025,0.998457
7,all-MiniLM-L12-v2,5e-05,0.967593,0.99537,1.0
8,multi-qa-mpnet-base-dot-v1,5e-05,0.96142,0.989198,0.998457
9,multi-qa-mpnet-base-cos-v1,5e-05,0.958333,0.99537,0.998457


In [None]:
resultados2.to_csv(folder+"/resultados_finetuning.csv",index=False)