In [None]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [None]:
!pip install sentence-transformers --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/86.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m86.0/86.0 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.1/7.1 MB[0m [31m100.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m75.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m20.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m116.8 MB/s[0m eta [36m0:00:00[0m
[?25h  Building wheel for sentence-transformers (setup.py) ... [?25l[?25hdone


In [None]:
!nvidia-smi

Fri Apr 28 06:51:15 2023       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 525.85.12    Driver Version: 525.85.12    CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  Tesla T4            Off  | 00000000:00:04.0 Off |                    0 |
| N/A   51C    P8    10W /  70W |      0MiB / 15360MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [None]:
import logging
import math
import sys
import os
import time
import gzip
import csv
import random
import torch
import pandas as pd
from datetime import datetime
from pathlib import Path
from sentence_transformers import models, losses, datasets
from sentence_transformers import LoggingHandler, SentenceTransformer, util, InputExample
from sentence_transformers.evaluation import EmbeddingSimilarityEvaluator
from tqdm.auto import tqdm

In [None]:
project_base_path = Path("drive/MyDrive/Promotion/negation-awareness")
negation_dataset = project_base_path / "data/"

base_model = "sentence-transformers/all-mpnet-base-v2"
output_model_name = f"{base_model.split('/')[1]}-negation-wmt"  # TODO.
timestamp: str = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
model_save_path = str(project_base_path / f"finetuned-models/{output_model_name}")

# Check if dataset exist. If not, download and extract it
sts_dataset_path = 'data/stsbenchmark.tsv.gz'
if not os.path.exists(sts_dataset_path):
  util.http_get('https://sbert.net/datasets/stsbenchmark.tsv.gz', sts_dataset_path)

  0%|          | 0.00/392k [00:00<?, ?B/s]

In [None]:
# Code below adapted from
# https://github.com/UKPLab/sentence-transformers/blob/3e1929fddef16df94f8bc6e3b10598a98f46e62d/examples/training/nli/training_nli_v2.py
# distributed under Apache-2.0 license. See https://github.com/UKPLab/sentence-transformers/blob/master/LICENSE.

In [None]:
#train_split = 0.9  # percentage of training samples from the total number of samples in the dataset
train_batch_size = 64  # The larger you select this, the better the results (usually). But it requires more GPU memory
max_seq_length = 75
num_epochs = 1

In [None]:
import gc
torch.cuda.empty_cache()
gc.collect()

18

In [None]:
import torch
from torch import nn, Tensor
from typing import Iterable, Dict


class MultipleNegativesRankingLoss(nn.Module):
    """
        This loss expects as input a batch consisting of sentence pairs (a_1, p_1), (a_2, p_2)..., (a_n, p_n)
        where we assume that (a_i, p_i) are a positive pair and (a_i, p_j) for i!=j a negative pair.

        For each a_i, it uses all other p_j as negative samples, i.e., for a_i, we have 1 positive example (p_i) and
        n-1 negative examples (p_j). It then minimizes the negative log-likehood for softmax normalized scores.

        This loss function works great to train embeddings for retrieval setups where you have positive pairs (e.g. (query, relevant_doc))
        as it will sample in each batch n-1 negative docs randomly.

        The performance usually increases with increasing batch sizes.

        For more information, see: https://arxiv.org/pdf/1705.00652.pdf
        (Efficient Natural Language Response Suggestion for Smart Reply, Section 4.4)

        You can also provide one or multiple hard negatives per anchor-positive pair by structering the data like this:
        (a_1, p_1, n_1), (a_2, p_2, n_2)

        Here, n_1 is a hard negative for (a_1, p_1). The loss will use for the pair (a_i, p_i) all p_j (j!=i) and all n_j as negatives.

        Example::

            from sentence_transformers import SentenceTransformer, losses, InputExample
            from torch.utils.data import DataLoader

            model = SentenceTransformer('distilbert-base-uncased')
            train_examples = [InputExample(texts=['Anchor 1', 'Positive 1']),
                InputExample(texts=['Anchor 2', 'Positive 2'])]
            train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=32)
            train_loss = losses.MultipleNegativesRankingLoss(model=model)
    """
    def __init__(self, model: SentenceTransformer, scale: float = 20.0, similarity_fct = util.cos_sim):
        """
        :param model: SentenceTransformer model
        :param scale: Output of similarity function is multiplied by scale value
        :param similarity_fct: similarity function between sentence embeddings. By default, cos_sim. Can also be set to dot product (and then set scale to 1)
        """
        super(MultipleNegativesRankingLoss, self).__init__()
        self.model = model
        self.scale = scale
        self.similarity_fct = similarity_fct
        self.cross_entropy_loss = nn.CrossEntropyLoss()
        print(similarity_fct)


    def forward(self, sentence_features: Iterable[Dict[str, Tensor]], labels: Tensor):
        reps = [self.model(sentence_feature)['sentence_embedding'] for sentence_feature in sentence_features]
        embeddings_a = reps[0]
        embeddings_b = torch.cat(reps[1:])

        scores = self.similarity_fct(embeddings_a, embeddings_b) * self.scale
        labels = torch.tensor(range(len(scores)), dtype=torch.long, device=scores.device)  # Example a[i] should match with b[i]
        return self.cross_entropy_loss(scores, labels)

    def get_config_dict(self):
        return {'scale': self.scale, 'similarity_fct': self.similarity_fct.__name__}





In [None]:
#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])
#### /print debug information to stdout

# Here we define our SentenceTransformer model
word_embedding_model = models.Transformer(base_model, max_seq_length=max_seq_length)
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode='mean')
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

# Read the negation dataset file and create the training dataset
logging.info("Read Negation Dataset train dataset")

#"""
#negation_dataset = "/content/drive/MyDrive/Promotion/negation-awareness/data/negation_dataset_v1.1.tsv"
#negation_dataset = "/content/drive/MyDrive/Promotion/negation-awareness/data/train_neg_0_antonym_full.tsv"
negation_dataset = "/content/drive/MyDrive/Promotion/negation-awareness/data/wmt_neg_train.tsv"

def load_data(negation_dataset_path):
    def add_to_samples(sent1, sent2, label):
        if sent1 not in train_data:
            train_data[sent1] = {'contradiction': set(), 'entailment': set(), 'neutral': set()}
        #label = "contradiction" if label == "1" else "entailment"
        label = "contradiction" if float(label) <= 0.5 else "entailment"
        train_data[sent1][label].add(sent2)

    with open(negation_dataset_path, "r", encoding="utf-8") as f:
        n_total_samples = len(f.readlines()) - 1  # don't count the header
    #n_train_samples: int = math.floor(n_total_samples*train_split)
    print("Number of train samples:", n_total_samples, "\n")

    train_data = {}
    with open(negation_dataset, "r", encoding="utf-8") as f:
        reader = csv.DictReader(f, delimiter='\t', quoting=csv.QUOTE_NONE)
        for i, row in enumerate(reader):
            #if i == n_train_samples-1:
            #    break
            #sent1 = row['premise'].strip()
            #sent2 = row['hypothesis'].strip()
            #add_to_samples(sent1, sent2, row['label'])
            sent1 = row['reference'].strip()
            sent2 = row['candidate'].strip()
            add_to_samples(sent1, sent2, row['score'])

    train_samples = []
    for sent1, others in train_data.items():
        if len(others['entailment']) > 0 and len(others['contradiction']) > 0:
            train_samples.append(InputExample(texts=[sent1, random.choice(list(others['entailment'])), random.choice(list(others['contradiction']))]))
            train_samples.append(InputExample(texts=[random.choice(list(others['entailment'])), sent1, random.choice(list(others['contradiction']))]))
    return train_samples
#"""

"""
def data_to_InputExample(dataset:pd.DataFrame,
                         ref_column:str="reference",
                         cand_column:str="candidate",
                         lab_column:str="score") -> list[InputExample]:
  return [
      InputExample(i, texts=[x[ref_column], x[cand_column]], label=x[lab_column])
        for i, x in tqdm(dataset.iterrows(), total=len(dataset))
  ]

train = pd.read_json(negation_dataset/'train_neg_0_antonym_full.json', lines=True)
train_samples = data_to_InputExample(train)
print("Train samples: {}".format(len(train_samples)))
#eval = pd.read_json(negation_dataset/'eval_neg_0_antonym_full.json', lines=True)
#dev_samples = data_to_InputExample(eval)
#logging.info("Dev samples: {}".format(len(dev_samples)))
"""
train_samples=load_data("/content/drive/MyDrive/Promotion/negation-awareness/data/wmt_neg_train.tsv")
# Special data loader that avoid duplicates within a batch
train_dataloader = datasets.NoDuplicatesDataLoader(train_samples, batch_size=train_batch_size)


# Our training loss
train_loss = MultipleNegativesRankingLoss(model)


# Read STSbenchmark dataset and use it as development set
"""
logging.info("Read STSbenchmark dev dataset")
dev_samples = []
with gzip.open(sts_dataset_path, 'rt', encoding='utf8') as fIn:
    reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE)
    for row in reader:
        if row['split'] == 'dev':
            score = float(row['score']) / 5.0  # Normalize score to range 0 ... 1
            dev_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=score))
"""

dev_samples = load_data("/content/drive/MyDrive/Promotion/negation-awareness/data/wmt_neg_eval.tsv")
dev_evaluator = EmbeddingSimilarityEvaluator.from_input_examples(dev_samples, batch_size=train_batch_size, name='wmt-dev')

# Configure the training
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)  # 10% of train data for warm-up
logging.info("Warmup-steps: {}".format(warmup_steps))


# Train the model
eval_log_steps = int(len(train_dataloader)*0.1)
start_time = time.perf_counter()
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    evaluator=dev_evaluator,
    epochs=num_epochs,
    evaluation_steps=eval_log_steps,
    warmup_steps=warmup_steps,
    output_path=model_save_path,
    use_amp=True  # Set to True, if your GPU supports FP16 operations
)
elapsed_time = time.perf_counter() - start_time

print("\n\nTraining time (seconds):", elapsed_time)

Number of train samples: 62435 

<function cos_sim at 0x7f346e4bcf70>
Number of train samples: 7805 



Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/358 [00:00<?, ?it/s]





Training time (seconds): 877.1837075560001




In [None]:
model.save(model_save_path)

In [None]:
base_mod = SentenceTransformer(base_model)
model.eval()
def cos_score(reference: str, candidate: str, model:SentenceTransformer) -> float:
    emb_ref = model.encode(reference)
    emb_cand = model.encode(candidate)
    return util.cos_sim(emb_ref, emb_cand).item()

def cos_score_batched(references: list, candidates: list, model: SentenceTransformer, batch_size=8) -> torch.Tensor:
    assert len(references) == len(candidates), "Number of references and candidates must be equal"
    emb_ref = model.encode(references, batch_size=batch_size)
    emb_cand = model.encode(candidates, batch_size=batch_size)
    return torch.diag(util.cos_sim(emb_ref, emb_cand))

sent_pairs = [
    ("It's rather hot in here.", "It's rather cold in here."),
    ("This is a red cat with a hat.", "This isn't a red cat with a hat."),
    ("This is a red cat with a hat.", "This is not a red cat with a hat."),
    ("Today is a beautiful day.", "Today is a wonderful day."),
    ("Today is a beautiful day.", "beautiful day today is."),
    ("Today is a beautiful day.", "today today today today is a beautiful day."),
    ("Today is a beautiful day.", "Today is a betiful day."),
    ("Today is a beautiful day.", "Today is a really beautiful day."),
    ("Today is a beautiful day.", "Today is a beautiful day."),
    ("Today is a beautiful day.", "."),
]

for s1, s2 in sent_pairs:
    print(s1)
    print(s2)
    print("Base", cos_score_batched([s1], [s2], base_mod))
    print("FT", cos_score_batched([s1], [s2], model))

Downloading (…)a8e1d/.gitattributes:   0%|          | 0.00/1.18k [00:00<?, ?B/s]

Downloading (…)_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/README.md:   0%|          | 0.00/10.6k [00:00<?, ?B/s]

Downloading (…)0bca8e1d/config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

Downloading (…)ce_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

Downloading (…)e1d/data_config.json:   0%|          | 0.00/39.3k [00:00<?, ?B/s]

Downloading pytorch_model.bin:   0%|          | 0.00/438M [00:00<?, ?B/s]

Downloading (…)nce_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

Downloading (…)cial_tokens_map.json:   0%|          | 0.00/239 [00:00<?, ?B/s]

Downloading (…)a8e1d/tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Downloading (…)okenizer_config.json:   0%|          | 0.00/363 [00:00<?, ?B/s]

Downloading (…)8e1d/train_script.py:   0%|          | 0.00/13.1k [00:00<?, ?B/s]

Downloading (…)b20bca8e1d/vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

Downloading (…)bca8e1d/modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

It's rather hot in here.
It's rather cold in here.
Base tensor([0.6409])
FT tensor([0.4014])
This is a red cat with a hat.
This isn't a red cat with a hat.
Base tensor([0.8470])
FT tensor([0.3507])
This is a red cat with a hat.
This is not a red cat with a hat.
Base tensor([0.8495])
FT tensor([0.3891])
Today is a beautiful day.
Today is a wonderful day.
Base tensor([0.8489])
FT tensor([0.8917])
Today is a beautiful day.
beautiful day today is.
Base tensor([0.8893])
FT tensor([0.9317])
Today is a beautiful day.
today today today today is a beautiful day.
Base tensor([0.8683])
FT tensor([0.8758])
Today is a beautiful day.
Today is a betiful day.
Base tensor([0.6771])
FT tensor([0.7057])
Today is a beautiful day.
Today is a really beautiful day.
Base tensor([0.9344])
FT tensor([0.9597])
Today is a beautiful day.
Today is a beautiful day.
Base tensor([1.0000])
FT tensor([1.])
Today is a beautiful day.
.
Base tensor([0.1430])
FT tensor([0.1982])


In [None]:
def cos_score(reference: str, candidate: str, model:SentenceTransformer) -> float:
    emb_ref = model.encode(reference)
    emb_cand = model.encode(candidate)
    return util.cos_sim(emb_ref, emb_cand).item()

def cos_score_batched(references: list, candidates: list, model: SentenceTransformer, batch_size=8) -> torch.Tensor:
    assert len(references) == len(candidates), "Number of references and candidates must be equal"
    emb_ref = model.encode(references, batch_size=batch_size)
    emb_cand = model.encode(candidates, batch_size=batch_size)
    return torch.diag(util.cos_sim(emb_ref, emb_cand))

sents1 = ["It's rather hot in here.", "This is a red cat with a hat.", "This is a red cat with a hat.", "Today is a beautiful day.", "You are fat."]
sents2 = ["It's rather cold in here.", "This isn't a red cat with a hat.", "This is not a red cat with a hat.", "Today is a wonderful day.", "You are not thin."]

for s1, s2 in zip(sents1, sents2):
    print(s1)
    print(s2)
    print("Base", cos_score_batched([s1], [s2], model))

It's rather hot in here.
It's rather cold in here.
Base tensor([0.3756])
This is a red cat with a hat.
This isn't a red cat with a hat.
Base tensor([0.3306])
This is a red cat with a hat.
This is not a red cat with a hat.
Base tensor([0.3956])
Today is a beautiful day.
Today is a wonderful day.
Base tensor([0.9358])
You are fat.
You are not thin.
Base tensor([0.7832])


---

The code below is identical to the one above, only that it uses `losses.ContrastiveLoss` instead of `losses.MultipleNegativesRankingLoss`. Perfomance is worse on the STS Benchmark.

I've also tried `losses.OnlineContrastiveLoss` but the score was even lower.

All losses available: https://www.sbert.net/docs/package_reference/losses.html

In [None]:
#### Just some code to print debug information to stdout
logging.basicConfig(format='%(asctime)s - %(message)s',
                    datefmt='%Y-%m-%d %H:%M:%S',
                    level=logging.INFO,
                    handlers=[LoggingHandler()])
#### /print debug information to stdout

# Here we define our SentenceTransformer model
word_embedding_model = models.Transformer(base_model, max_seq_length=max_seq_length)
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension(), pooling_mode='mean')
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

# Read the negation dataset file and create the training dataset
logging.info("Read Negation Dataset train dataset")

def add_to_samples(sent1, sent2, label):
    if sent1 not in train_data:
        train_data[sent1] = {'contradiction': set(), 'entailment': set(), 'neutral': set()}
    label = "contradiction" if label == "1" else "entailment"
    train_data[sent1][label].add(sent2)

with open(negation_dataset, "r", encoding="utf-8") as f:
    n_total_samples = len(f.readlines()) - 1  # don't count the header
n_train_samples: int = math.floor(n_total_samples*train_split)
print("Number of train samples:", n_train_samples, "\n")

train_data = {}
with open(negation_dataset, "r", encoding="utf-8") as f:
    reader = csv.DictReader(f, delimiter='\t', quoting=csv.QUOTE_NONE)
    for i, row in enumerate(reader):
        if i == n_train_samples-1:
            break
        sent1 = row['premise'].strip()
        sent2 = row['hypothesis'].strip()
        add_to_samples(sent1, sent2, row['label'])

train_samples = []
for sent1, others in train_data.items():
    if len(others['entailment']) > 0 and len(others['contradiction']) > 0:
        train_samples.append(InputExample(texts=[sent1, random.choice(list(others['entailment']))], label=1))
        train_samples.append(InputExample(texts=[sent1, random.choice(list(others['contradiction']))], label=0))

logging.info("Train samples: {}".format(len(train_samples)))

# Special data loader that avoid duplicates within a batch
train_dataloader = datasets.NoDuplicatesDataLoader(train_samples, batch_size=train_batch_size)

# Our training loss
train_loss = losses.ContrastiveLoss(model)

# Read STSbenchmark dataset and use it as development set
logging.info("Read STSbenchmark dev dataset")
dev_samples = []
with gzip.open(sts_dataset_path, 'rt', encoding='utf8') as fIn:
    reader = csv.DictReader(fIn, delimiter='\t', quoting=csv.QUOTE_NONE)
    for row in reader:
        if row['split'] == 'dev':
            score = float(row['score']) / 5.0  # Normalize score to range 0 ... 1
            dev_samples.append(InputExample(texts=[row['sentence1'], row['sentence2']], label=score))

dev_evaluator = EmbeddingSimilarityEvaluator.from_input_examples(dev_samples, batch_size=train_batch_size, name='sts-dev')

# Configure the training
warmup_steps = math.ceil(len(train_dataloader) * num_epochs * 0.1)  # 10% of train data for warm-up
logging.info("Warmup-steps: {}".format(warmup_steps))


# Train the model
start_time = time.perf_counter()
model.fit(
    train_objectives=[(train_dataloader, train_loss)],
    evaluator=dev_evaluator,
    epochs=num_epochs,
    evaluation_steps=int(len(train_dataloader)*0.1),
    warmup_steps=warmup_steps,
    output_path=model_save_path,
    use_amp=True  # Set to True, if your GPU supports FP16 operations
)
elapsed_time = time.perf_counter() - start_time

print("\n\nTraining time (seconds):", elapsed_time)

Number of train samples: 61902 



Epoch:   0%|          | 0/1 [00:00<?, ?it/s]

Iteration:   0%|          | 0/437 [00:00<?, ?it/s]



Training time (seconds): 162.48789934699994


---

In [None]:
model_save_path = str(project_base_path / f"finetuned-models/2023-02-17_15-02-13/{output_model_name}")

finetuned_model = SentenceTransformer(model_save_path)
base_model = SentenceTransformer(base_model)

In [None]:
sentences = {
    "disimilar sentences": [
        ("This is a red cat with a hat.", "This isn't a red cat with a hat."),
        ("It is rather cold in here.", "It is rather hot in here."),
        ("He was able to afford a very expensive car.", "He wasn't able to afford a very expensive car."),
        ("He exited the Rutgers faculty in 1994.", "He joined the Rutgers faculty in 1994."),
        ("This approach is used by Stanford's PwdHash.", "This approach is avoided by Stanford's PwdHash."),
        ("I have been to Paris.", "I have never been to Paris."),
        ("3.6 Million More Penguins Live in Antarctica Than Thought","6.3 Million More Penguins Live in Antarctica Than Thought"),
        ("It was the best of times, it was the age of wisdom...", "It was the worst of times, it was the age of foolishness..."),
        ("I like rainy days because they make me feel relaxed.", "I don't like rainy days because they don't make me feel relaxed.")
    ],
    "similar sentences": [
        ("The antemedial, postmedial and subterminal lines are invisible.", "The antemedial, postmedial and subterminal lines are not visible."),
        ("The Public Health Department of Malta was housed in the building until 1998.", "The building has housed Malta's Public Health Department until 1998."),
        ("The movie was very bad.", "The movie was not good at all."),
        ("It should be easy.", "It shouldn't be hard."),
        ("Do you think we will be able to achieve it?", "In your opinion, will we achieve it?")
    ]
}

In [None]:
print("***********************")
print("* COSINE SIMILARITIES *")
print("***********************\n")

for key, values in sentences.items():
    print(f"{key.title()}")
    print(f"{'-' * len(key)}\n")
    for sentence_pair in values:
        sent1, sent2 = sentence_pair

        emb1 = base_model.encode(sent1)
        emb2 = base_model.encode(sent2)
        cos_sim_base = util.cos_sim(emb1, emb2).item()

        emb1 = finetuned_model.encode(sent1)
        emb2 = finetuned_model.encode(sent2)
        cos_sim_finetuned = util.cos_sim(emb1, emb2).item()

        print(sent1, "|", sent2)
        print()
        print("   Base:      ", cos_sim_base)
        print("   Fine-tuned:", cos_sim_finetuned)
        print("\n")

***********************
* COSINE SIMILARITIES *
***********************

Disimilar Sentences
-------------------

This is a red cat with a hat. | This isn't a red cat with a hat.

   Base:       0.8470286726951599
   Fine-tuned: 0.48826050758361816


It is rather cold in here. | It is rather hot in here.

   Base:       0.6452344655990601
   Fine-tuned: 0.2806696891784668


He was able to afford a very expensive car. | He wasn't able to afford a very expensive car.

   Base:       0.6313993334770203
   Fine-tuned: 0.2822864353656769


He exited the Rutgers faculty in 1994. | He joined the Rutgers faculty in 1994.

   Base:       0.8740546107292175
   Fine-tuned: 0.6433200836181641


This approach is used by Stanford's PwdHash. | This approach is avoided by Stanford's PwdHash.

   Base:       0.6105257272720337
   Fine-tuned: 0.30879804491996765


I have been to Paris. | I have never been to Paris.

   Base:       0.690028965473175
   Fine-tuned: 0.4106355905532837


3.6 Million More Pe

---