In [45]:
from datasets import load_dataset
from sentence_transformers import InputExample, SentenceTransformer, losses, evaluation
from torch.utils.data import DataLoader
import logging
from sentence_transformers import LoggingHandler, SentenceTransformer
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
from sentence_transformers.evaluation import SimilarityFunction

In [46]:
# MONOLINGUAL

#stsb = load_dataset('mteb/stsbenchmark-sts')

biosses = load_dataset('mteb/biosses-sts')

sick_r = load_dataset('mteb/sickr-sts')

sts12 = load_dataset('mteb/sts12-sts')
sts13 = load_dataset('mteb/sts13-sts')
sts14 = load_dataset('mteb/sts14-sts')
sts15 = load_dataset('mteb/sts15-sts')
sts16 = load_dataset('mteb/sts16-sts')
#sts17 = load_dataset('mteb/sts17-crosslingual-sts', "en-en")
#sts22 = load_dataset('mteb/sts22-crosslingual-sts', "en")

In [47]:
# Charger les datasets multilingues
languages_list = ['de', 'en', 'es', 'fr', 'it', 'nl', 'pl', 'pt', 'ru', 'zh']
datasets_stsbmt = [load_dataset('PhilipMay/stsb_multi_mt', lang) for lang in languages_list]

languages_sts17 = ['ko-ko', 'ar-ar', 'en-ar', 'en-de', 'en-en', 'en-tr', 'es-en', 'es-es', 'fr-en', 'it-en', 'nl-en']
datasets_sts17 = [load_dataset('mteb/sts17-crosslingual-sts', lang) for lang in languages_sts17]

languages_sts22 = ['fr-pl', 'en', 'es-en', 'zh', 'fr', 'de', 'tr', 'ru', 'de-en', 'pl', 'es', 'pl-en', 'es-it', 'zh-en', 'it', 'de-fr', 'de-pl', 'ar']
datasets_sts22 = [load_dataset('mteb/sts22-crosslingual-sts', lang) for lang in languages_sts22]

datasets = {
    "stsb_tr": load_dataset("emrecan/stsb-mt-turkish"),
    "stsb_he": load_dataset("imvladikon/stsb_he"),
    "sts_no": load_dataset("tollefj/sts-concatenated-NOB"),
    "sts_faroese": load_dataset("vesteinn/faroese-sts"),
    "stsb_id": load_dataset("LazarusNLP/stsb_mt_id"),
    "sickr_pl": load_dataset("PL-MTEB/sickr-pl-sts"),
    "cdscr_pl": load_dataset("PL-MTEB/cdscr-sts"),
    "stsb_vi": load_dataset("doanhieung/vi-stsbenchmark"),
    "sickr_vi": load_dataset("nlplabtdtu/sickr-sts-vi"),
    "biosses_vi": load_dataset("nlplabtdtu/biosses-sts-vi"),
    "sts12_vi": load_dataset("nlplabtdtu/sts12-vi"),
    "sts13_vi": load_dataset("nlplabtdtu/sts13-vi"),
    "sts14_vi": load_dataset("nlplabtdtu/sts14-vi"),
    "sts15_vi": load_dataset("nlplabtdtu/sts15-vi"),
    "sts16_vi": load_dataset("nlplabtdtu/sts16-vi"),
    "sts12_fr": load_dataset("Lajavaness/STS12-fr"),
    "sts13_fr": load_dataset("Lajavaness/STS13-fr"),
    "sts14_fr": load_dataset("Lajavaness/STS14-fr"),
    "sts15_fr": load_dataset("Lajavaness/STS15-fr"),
    "sts16_fr": load_dataset("Lajavaness/STS16-fr"),
    "stsb_en2indic": load_dataset("mteb/indic_sts")
}

stsb_ko = load_dataset("supark/ko-stsb")

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/18 [00:00<?, ?it/s]

In [48]:
def prepare_data(dataset):
    examples = []
    for item in dataset:
        if 'score' in item.keys():
            score = float(item['score']) / 5.0  # Normaliser les scores qui sont sur 5
        elif 'relatedness_score' in item.keys():
            score = float(item['relatedness_score']) / 5.0
        elif 'label' in item.keys():
            score = float(item['label']) / 5.0
        elif 'correlation' in item.keys():
            score = float(item['correlation']) / 5.0
        else:
            score = float(item['similarity_score']) / 5.0
        if 'sentence1' in item.keys() and 'sentence2' in item.keys():
            examples.append(InputExample(texts=[item['sentence1'], item['sentence2']], label=score))
        elif 'sentence_A' in item.keys() and 'sentence_B' in item.keys():
            examples.append(InputExample(texts=[item['sentence_A'], item['sentence_B']], label=score))
        elif 'text_1' in item.keys() and 'text_2' in item.keys():
            examples.append(InputExample(texts=[item['text_1'], item['text_2']], label=score))
        elif 'english_sentence' in item.keys() and 'indic_sentence' in item.keys():
            examples.append(InputExample(texts=[item['english_sentence'], item['indic_sentence']], label=score))
    return examples

In [49]:
def prepare_data_without_score(dataset):
    examples = []
    for item in dataset:
        if 'sentence1' in item.keys() and 'sentence2' in item.keys():
            examples.append(InputExample(texts=[item['sentence1'], item['sentence2']], label=float(item['score'])))
    return examples

In [50]:
# MONOLINGUAL

#train_examples_stsb = prepare_data(stsb['train'])
train_examples_sts12 = prepare_data(sts12['train'])
#train_examples_sts22 = prepare_data(sts22['train'])

all_train_data = train_examples_sts12

In [51]:
# MULTILINGUAL

#all_train_data = []

for dataset in datasets_stsbmt:
    all_train_data.extend(prepare_data(dataset['train']))

for dataset in datasets_sts22:
    if 'train' in dataset.keys():
        all_train_data.extend(prepare_data(dataset['train']))

In [52]:
all_train_data = all_train_data + prepare_data(biosses['test']) + prepare_data(sts12['test']) + prepare_data(sts13['test']) + prepare_data(sts14['test']) + prepare_data(sts15['test']) + prepare_data(sts16['test']) + prepare_data(sick_r['test'])

In [53]:
for name, dataset in datasets.items():
    for split in dataset.keys():
        all_train_data.extend(prepare_data(dataset[split]))

In [54]:
for split in stsb_ko.keys():
    all_train_data.extend(prepare_data_without_score(stsb_ko[split]))

In [55]:
# model = SentenceTransformer('sentence-transformers/paraphrase-albert-base-v2')
#model = SentenceTransformer('all-mpnet-base-v2')
#model = SentenceTransformer('aditeyabaral/sentencetransformer-xlm-roberta-base')
#model = SentenceTransformer('sentence-transformers/stsb-xlm-r-multilingual')
#model = SentenceTransformer('BAAI/bge-large-en-v1.5')
model = SentenceTransformer('sentence-transformers/paraphrase-multilingual-mpnet-base-v2')
#model = SentenceTransformer('sentence-transformers/gtr-t5-large')

INFO:sentence_transformers.SentenceTransformer:Use pytorch device_name: cuda
INFO:sentence_transformers.SentenceTransformer:Load pretrained SentenceTransformer: sentence-transformers/paraphrase-multilingual-mpnet-base-v2


In [56]:
train_dataloader = DataLoader(all_train_data, batch_size=256, shuffle=True)
train_loss = losses.AnglELoss(model=model)

In [29]:
# MONO

stsb_validation_data = prepare_data(stsb['validation'])

# Combine all datasets into a single flat list
all_validation_data = stsb_validation_data

evaluator = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(all_validation_data,
                                                                        main_similarity=SimilarityFunction.COSINE,
                                                                        name='sts-dev')

NameError: name 'stsb' is not defined

In [57]:
# MULTI

all_validation_data = []

datasets_stsbmt_validation_data = [prepare_data(dataset['dev']) for dataset in datasets_stsbmt]

for dataset in datasets_stsbmt_validation_data:
    all_validation_data.extend(dataset)

evaluator = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(all_validation_data,
                                                                        main_similarity=SimilarityFunction.COSINE,
                                                                        name='sts-dev')

In [58]:
# Configuration de l'enregistrement pour suivre l'entraînement
logging.basicConfig(level=logging.INFO, handlers=[LoggingHandler()])

# Paramètres d'entraînement
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    evaluator=evaluator,
    epochs=10,
    warmup_steps=100,
    weight_decay=0.01,
    output_path='output/trained-paraphrase-albert-base-v2'
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




Step,Training Loss,Validation Loss,Sts-dev Pearson Cosine,Sts-dev Spearman Cosine,Sts-dev Pearson Manhattan,Sts-dev Spearman Manhattan,Sts-dev Pearson Euclidean,Sts-dev Spearman Euclidean,Sts-dev Pearson Dot,Sts-dev Spearman Dot,Sts-dev Pearson Max,Sts-dev Spearman Max
500,10.9426,,,,,,,,,,,
885,10.9426,No log,0.911734,0.920240,0.906190,0.916297,0.905947,0.916207,0.729351,0.732066,0.911734,0.920240
1000,9.7184,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log
1500,9.5348,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log
1770,9.5348,No log,0.932814,0.939958,0.920257,0.931890,0.920622,0.932497,0.783756,0.787857,0.932814,0.939958
2000,9.4412,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log
2500,9.3097,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log
2655,9.3097,No log,0.941932,0.948877,0.925361,0.937414,0.926205,0.938538,0.804934,0.810363,0.941932,0.948877
3000,9.2357,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log
3500,9.1594,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log,No Log


INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-dev dataset after epoch 1.0:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.9117	Spearman: 0.9202
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.9062	Spearman: 0.9163
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Euclidean-Distance:	Pearson: 0.9059	Spearman: 0.9162
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Dot-Product-Similarity:	Pearson: 0.7294	Spearman: 0.7321
INFO:sentence_transformers.SentenceTransformer:Save model to output/trained-paraphrase-albert-base-v2


Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-dev dataset after epoch 2.0:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.9328	Spearman: 0.9400
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.9203	Spearman: 0.9319
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Euclidean-Distance:	Pearson: 0.9206	Spearman: 0.9325
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Dot-Product-Similarity:	Pearson: 0.7838	Spearman: 0.7879
INFO:sentence_transformers.SentenceTransformer:Save model to output/trained-paraphrase-albert-base-v2


Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-dev dataset after epoch 3.0:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.9419	Spearman: 0.9489
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.9254	Spearman: 0.9374
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Euclidean-Distance:	Pearson: 0.9262	Spearman: 0.9385
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Dot-Product-Similarity:	Pearson: 0.8049	Spearman: 0.8104
INFO:sentence_transformers.SentenceTransformer:Save model to output/trained-paraphrase-albert-base-v2


Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-dev dataset after epoch 4.0:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.9470	Spearman: 0.9528
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.9270	Spearman: 0.9386
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Euclidean-Distance:	Pearson: 0.9273	Spearman: 0.9392
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Dot-Product-Similarity:	Pearson: 0.8437	Spearman: 0.8518
INFO:sentence_transformers.SentenceTransformer:Save model to output/trained-paraphrase-albert-base-v2


Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-dev dataset after epoch 5.0:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.9493	Spearman: 0.9553
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.9267	Spearman: 0.9385
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Euclidean-Distance:	Pearson: 0.9274	Spearman: 0.9394
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Dot-Product-Similarity:	Pearson: 0.8438	Spearman: 0.8525
INFO:sentence_transformers.SentenceTransformer:Save model to output/trained-paraphrase-albert-base-v2


Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-dev dataset after epoch 6.0:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.9514	Spearman: 0.9567
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.9270	Spearman: 0.9386
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Euclidean-Distance:	Pearson: 0.9278	Spearman: 0.9397
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Dot-Product-Similarity:	Pearson: 0.8595	Spearman: 0.8684
INFO:sentence_transformers.SentenceTransformer:Save model to output/trained-paraphrase-albert-base-v2


Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-dev dataset after epoch 7.0:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.9531	Spearman: 0.9581
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.9264	Spearman: 0.9381
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Euclidean-Distance:	Pearson: 0.9271	Spearman: 0.9389
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Dot-Product-Similarity:	Pearson: 0.8684	Spearman: 0.8789
INFO:sentence_transformers.SentenceTransformer:Save model to output/trained-paraphrase-albert-base-v2


Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-dev dataset after epoch 8.0:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.9537	Spearman: 0.9582
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.9262	Spearman: 0.9379
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Euclidean-Distance:	Pearson: 0.9271	Spearman: 0.9390
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Dot-Product-Similarity:	Pearson: 0.8735	Spearman: 0.8838
INFO:sentence_transformers.SentenceTransformer:Save model to output/trained-paraphrase-albert-base-v2


Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-dev dataset after epoch 9.0:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.9546	Spearman: 0.9589
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.9275	Spearman: 0.9388
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Euclidean-Distance:	Pearson: 0.9285	Spearman: 0.9400
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Dot-Product-Similarity:	Pearson: 0.8762	Spearman: 0.8864
INFO:sentence_transformers.SentenceTransformer:Save model to output/trained-paraphrase-albert-base-v2


Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-dev dataset after epoch 10.0:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.9551	Spearman: 0.9593
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.9270	Spearman: 0.9383
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Euclidean-Distance:	Pearson: 0.9278	Spearman: 0.9394
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Dot-Product-Similarity:	Pearson: 0.8760	Spearman: 0.8865
INFO:sentence_transformers.SentenceTransformer:Save model to output/trained-paraphrase-albert-base-v2


Computing widget examples:   0%|          | 0/5 [00:00<?, ?example/s]

In [None]:
# MONO

all_test_data = prepare_data(stsb['test']) + prepare_data(sts17['test']) + prepare_data(sts22['test']) + prepare_data(sick_r['test']) + prepare_data(biosses['test']) + prepare_data(sts12['test']) + prepare_data(sts13['test']) + prepare_data(sts14['test']) + prepare_data(sts15['test']) + prepare_data(sts16['test'])

evaluator_stsb = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(prepare_data(stsb['test']), main_similarity=SimilarityFunction.COSINE, name='sts-test')
evaluator_sts17 = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(prepare_data(sts17['test']), main_similarity=SimilarityFunction.COSINE, name='sts-test')
evaluator_sts22 = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(prepare_data(sts22['test']), main_similarity=SimilarityFunction.COSINE, name='sts-test')
evaluator_sickr = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(prepare_data(sick_r['test']), main_similarity=SimilarityFunction.COSINE, name='sts-test')
evaluator_biosses = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(prepare_data(biosses['test']), main_similarity=SimilarityFunction.COSINE, name='sts-test')
evaluator_sts12 = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(prepare_data(sts12['test']), main_similarity=SimilarityFunction.COSINE, name='sts-test')
evaluator_sts13 = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(prepare_data(sts13['test']), main_similarity=SimilarityFunction.COSINE, name='sts-test')
evaluator_sts14 = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(prepare_data(sts14['test']), main_similarity=SimilarityFunction.COSINE, name='sts-test')
evaluator_sts15 = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(prepare_data(sts15['test']), main_similarity=SimilarityFunction.COSINE, name='sts-test')
evaluator_sts16 = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(prepare_data(sts16['test']), main_similarity=SimilarityFunction.COSINE, name='sts-test')

model.evaluate(evaluator_stsb)
model.evaluate(evaluator_sickr)
model.evaluate(evaluator_biosses)
model.evaluate(evaluator_sts12)
model.evaluate(evaluator_sts13)
model.evaluate(evaluator_sts14)
model.evaluate(evaluator_sts15)
model.evaluate(evaluator_sts16)
model.evaluate(evaluator_sts17)
model.evaluate(evaluator_sts22)

In [59]:
# MULTI

all_test_data = []

for dataset in datasets_stsbmt:
    all_test_data.extend(prepare_data(dataset['test']))

evaluator_stsb = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(all_test_data, main_similarity=SimilarityFunction.COSINE, name='sts-test')
model.evaluate(evaluator_stsb)

INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-test dataset:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.9480	Spearman: 0.9515
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.9252	Spearman: 0.9352
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Euclidean-Distance:	Pearson: 0.9258	Spearman: 0.9364
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Dot-Product-Similarity:	Pearson: 0.8443	Spearman: 0.8435


{'sts-test_pearson_cosine': 0.9479585032380113,
 'sts-test_spearman_cosine': 0.9514910354916427,
 'sts-test_pearson_manhattan': 0.925192141913064,
 'sts-test_spearman_manhattan': 0.9351648026362221,
 'sts-test_pearson_euclidean': 0.9258239806908134,
 'sts-test_spearman_euclidean': 0.9363652577900217,
 'sts-test_pearson_dot': 0.8442947652156254,
 'sts-test_spearman_dot': 0.8435104766124126,
 'sts-test_pearson_max': 0.9479585032380113,
 'sts-test_spearman_max': 0.9514910354916427}

In [60]:
i = 0
for dataset in datasets_sts17:
    print(languages_sts17[i])
    evaluator = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(prepare_data(dataset['test']), main_similarity=SimilarityFunction.COSINE, name='sts-test')
    model.evaluate(evaluator)
    i += 1

i = 0
for dataset in datasets_sts22:
    print(languages_sts22[i])
    evaluator = evaluation.EmbeddingSimilarityEvaluator.from_input_examples(prepare_data(dataset['test']), main_similarity=SimilarityFunction.COSINE, name='sts-test')
    model.evaluate(evaluator)
    i += 1

ko-ko
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-test dataset:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.9725	Spearman: 0.9766
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.9382	Spearman: 0.9487
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Euclidean-Distance:	Pearson: 0.9392	Spearman: 0.9500
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Dot-Product-Similarity:	Pearson: 0.8531	Spearman: 0.8611
ar-ar
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:EmbeddingSimilarityEvaluator: Evaluating the model on the sts-test dataset:
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Cosine-Similarity :	Pearson: 0.8027	Spearman: 0.8124
INFO:sentence_transformers.evaluation.EmbeddingSimilarityEvaluator:Manhattan-Distance:	Pearson: 0.

In [63]:
model.push_to_hub("Gameselo/STS-multilingual-mpnet-base-v2", token="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")

INFO:sentence_transformers.SentenceTransformer:Save model to /tmp/tmpdclc_upj


model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

Upload 3 LFS files:   0%|          | 0/3 [00:00<?, ?it/s]

'https://huggingface.co/Gameselo/STS-multilingual-mpnet-base-v2/commit/88edc4611526a6bea6e8996046ea356dc65b5340'