In [2]:
from sentence_transformers import SentenceTransformer, models, losses, InputExample, evaluation
import json
import torch
from torch.utils.data import DataLoader
from transformers import AutoTokenizer, AutoModel
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

  from .autonotebook import tqdm as notebook_tqdm


# Loading in Datasets

In [4]:
def get_premises_text(gold, raw_data_location):
  for data in gold:
        section = gold[data]["Section_id"]
        primary_id = gold[data]["Primary_id"]
        with open(f'{raw_data_location}/{primary_id}.json') as f:
            primary = json.load(f)

        primary_premise = ' '.join(primary[section])
        gold[data]["Primary_premise"] = primary_premise

        if gold[data]["Type"] == 'Comparison':
            secondary_id = gold[data]["Secondary_id"]
            with open(f'{raw_data_location}/{secondary_id}.json') as f:
                secondary = json.load(f)
            secondary_premise = ' '.join(secondary[section])
            gold[data]["Secondary_premise"] = secondary_premise

with open('..\\data\\raw\\train.json') as f:
    train_data = json.load(f)
with open('..\\data\\raw\\dev.json') as f:
    dev_data = json.load(f)
with open('..\\data\\raw\\test.json') as f:
    test_data = json.load(f)

get_premises_text(train_data, '..\\data\\raw\\CT')
get_premises_text(dev_data, '..\\data\\raw\\CT')
get_premises_text(test_data, '..\\data\\raw\\CT')

In [5]:
def update_data_dict(new_data_dict, old_data_dict, new_key, key_to_use):
    for key in new_data_dict:
        if key_to_use in new_data_dict[key]:
            old_data_dict[key][new_key] = new_data_dict[key][key_to_use]

In [6]:
for fine_tuning_steps in [0, 2, 5, 7, 10]:
    with open(f'..\\data\\raw\\summary_train_{fine_tuning_steps}.json') as f:
        train_data_summarized = json.load(f)

    with open(f'..\\data\\raw\\summary_dev_{fine_tuning_steps}.json') as f:
        dev_data_summarized = json.load(f)

    with open(f'..\\data\\raw\\summary_test_{fine_tuning_steps}.json') as f:
        test_data_summarized = json.load(f)

    update_data_dict(train_data_summarized, train_data, f"Summarized_Primary_premise_{fine_tuning_steps}", "Primary_Premise")
    update_data_dict(dev_data_summarized, dev_data, f"Summarized_Primary_premise_{fine_tuning_steps}", "Primary_Premise")
    update_data_dict(test_data_summarized, test_data, f"Summarized_Primary_premise_{fine_tuning_steps}", "Primary_Premise")

    update_data_dict(train_data_summarized, train_data, f"Summarized_Secondary_premise_{fine_tuning_steps}", "Secondary_Premise")
    update_data_dict(dev_data_summarized, dev_data, f"Summarized_Secondary_premise_{fine_tuning_steps}", "Secondary_Premise")
    update_data_dict(test_data_summarized, test_data, f"Summarized_Secondary_premise_{fine_tuning_steps}", "Secondary_Premise")

In [7]:
for fine_tuning_steps in [0, 2, 5, 7]:
    with open(f'..\\data\\raw\\scifive_train_{fine_tuning_steps}.json') as f:
        train_data_summarized = json.load(f)

    with open(f'..\\data\\raw\\scifive_dev_{fine_tuning_steps}.json') as f:
        dev_data_summarized = json.load(f)

    with open(f'..\\data\\raw\\scifive_test_{fine_tuning_steps}.json') as f:
        test_data_summarized = json.load(f)

    update_data_dict(train_data_summarized, train_data, f"Scifive_Primary_premise_{fine_tuning_steps}", "Primary_Premise")
    update_data_dict(dev_data_summarized, dev_data, f"Scifive_Primary_premise_{fine_tuning_steps}", "Primary_Premise")
    update_data_dict(test_data_summarized, test_data, f"Scifive_Primary_premise_{fine_tuning_steps}", "Primary_Premise")

    update_data_dict(train_data_summarized, train_data, f"Scifive_Secondary_premise_{fine_tuning_steps}", "Secondary_Premise")
    update_data_dict(dev_data_summarized, dev_data, f"Scifive_Seconday_premise_{fine_tuning_steps}", "Secondary_Premise")
    update_data_dict(test_data_summarized, test_data, f"Scifive_Seconday_premise_{fine_tuning_steps}", "Secondary_Premise")

In [8]:
for fine_tuning_steps in [0]:
    with open(f'..\\data\\raw\\combined_train_{fine_tuning_steps}.json') as f:
        train_data_summarized = json.load(f)

    with open(f'..\\data\\raw\\combined_dev_{fine_tuning_steps}.json') as f:
        dev_data_summarized = json.load(f)

    with open(f'..\\data\\raw\\combined_test_{fine_tuning_steps}.json') as f:
        test_data_summarized = json.load(f)

    update_data_dict(train_data_summarized, train_data, f"Combined_Primary_premise_{fine_tuning_steps}", "Primary_Premise")
    update_data_dict(dev_data_summarized, dev_data, f"Combined_Primary_premise_{fine_tuning_steps}", "Primary_Premise")
    update_data_dict(test_data_summarized, test_data, f"Combined_Primary_premise_{fine_tuning_steps}", "Primary_Premise")

    # update_data_dict(train_data_summarized, train_data, f"Summarized_Secondary_premise_{fine_tuning_steps}", "Secondary_Premise")
    # update_data_dict(dev_data_summarized, dev_data, f"Summarized_Secondary_premise_{fine_tuning_steps}", "Secondary_Premise")
    # update_data_dict(test_data_summarized, test_data, f"Summarized_Secondary_premise_{fine_tuning_steps}", "Secondary_Premise")

# Generating Finetuning Dataset

In [37]:
fine_tuning_steps_suffix = "_0"  # "_0" or "_2" or "_5" or "_7" or "_10" or ""
premise_prefix = "Combined_" # "Summarized_" or ""Scifive_" or "" or "Combined_"
premise_combined = premise_prefix == "Combined_"
primary_premise_to_use = premise_prefix + "Primary_premise" + fine_tuning_steps_suffix
secondary_premise_to_use = premise_prefix + "Secondary_premise" + fine_tuning_steps_suffix

In [38]:
print(primary_premise_to_use)
print(secondary_premise_to_use)
print(premise_combined)

Combined_Primary_premise_0
Combined_Secondary_premise_0
True


In [39]:
train_examples = []
for data in train_data:
  statement = train_data[data]["Statement"]
  premise = train_data[data][primary_premise_to_use]
  currLabel = 0 if train_data[data]["Label"] == "Contradiction" else 1

  if not(premise_combined) and ("Secondary_Premise" in train_data[data]):
    premise += train_data[data][secondary_premise_to_use]
  train_examples.append(InputExample(texts=[premise, statement], label=currLabel))

In [40]:
sentences1 = []
sentences2 = []
scores = []

for data in dev_data:
  statement = dev_data[data]["Statement"]
  premise = dev_data[data][primary_premise_to_use]
  currLabel = 0 if dev_data[data]["Label"] == "Contradiction" else 1

  if not(premise_combined) and "Secondary_Premise" in dev_data[data]:
    premise += dev_data[data][secondary_premise_to_use]

  sentences1.append(premise)
  sentences2.append(statement)
  scores.append(currLabel)

# Dataloader for Finetuning and Evaluation of Sentence Transformer

In [44]:
word_embedding_model = models.Transformer('microsoft/deberta-v3-base')
pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension())
model = SentenceTransformer(modules=[word_embedding_model, pooling_model])

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [45]:
train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=16)

# Note here Contrastive Loss and Softmax Loss are both options we can use
train_loss = losses.ContrastiveLoss(model=model)

In [46]:
evaluator = evaluation.EmbeddingSimilarityEvaluator(sentences1, sentences2, scores)

# Sentence Transformer Model Training

In [47]:
model_save_path = f'../Sentence Transformer/{primary_premise_to_use}'
model.fit(train_objectives=[(train_dataloader, train_loss)],
          epochs=10,
          evaluator=evaluator,
          evaluation_steps=500,
          output_path=model_save_path)

Iteration: 100%|██████████| 107/107 [00:22<00:00,  4.76it/s]
Iteration: 100%|██████████| 107/107 [00:21<00:00,  4.98it/s]
Iteration: 100%|██████████| 107/107 [00:21<00:00,  5.07it/s]
Iteration: 100%|██████████| 107/107 [00:21<00:00,  5.01it/s]
Iteration: 100%|██████████| 107/107 [00:21<00:00,  5.02it/s]
Iteration: 100%|██████████| 107/107 [00:21<00:00,  4.97it/s]
Iteration: 100%|██████████| 107/107 [00:22<00:00,  4.75it/s]
Iteration: 100%|██████████| 107/107 [00:22<00:00,  4.75it/s]
Iteration: 100%|██████████| 107/107 [00:22<00:00,  4.73it/s]
Iteration: 100%|██████████| 107/107 [00:22<00:00,  4.79it/s]
Epoch: 100%|██████████| 10/10 [03:51<00:00, 23.15s/it]


# Sentence Embedding for Data

Note that currently since we are running a dummy example the premise_statements function stops the loop early. We need to remove this for the full run

In [48]:
#Mean Pooling - Take attention mask into account for correct averaging
def mean_pooling(model_output, attention_mask):
    token_embeddings = model_output[0] #First element of model_output contains all token embeddings
    input_mask_expanded = attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float()
    sum_embeddings = torch.sum(token_embeddings * input_mask_expanded, 1)
    sum_mask = torch.clamp(input_mask_expanded.sum(1), min=1e-9)
    return sum_embeddings / sum_mask

In [49]:
def premise_statements(dataset):
  premises = []
  statements = []
  outputs = []
  cnt = 1

  for data in dataset:
    # if cnt == 3:
      # break
    statement = dataset[data]["Statement"]
    premise = dataset[data]["Primary_premise"]
    currLabel = 0 if dataset[data]["Label"] == "Contradiction" else 1

    if "Secondary_Premise" in dataset[data]:
      premise += dataset[data]["Secondary_premise"]

    premises.append(premise)
    statements.append(statement)
    outputs.append(currLabel)
    cnt += 1

  return premises, statements, outputs

In [50]:
premises, statements, outputs = premise_statements(train_data)

tokenizer = AutoTokenizer.from_pretrained('../Sentence Transformer/trial_run')
model = AutoModel.from_pretrained('../Sentence Transformer/trial_run')

encoded_premise = tokenizer(premises, padding=True, truncation=True, max_length=300, return_tensors='pt')
encoded_statements = tokenizer(statements, padding=True, truncation=True, max_length=300, return_tensors='pt')

with torch.no_grad():
    premise_outputs = model(**encoded_premise)

with torch.no_grad():
    statement_outputs = model(**encoded_statements)


premise_embeddings = mean_pooling(premise_outputs, encoded_premise['attention_mask'])
statement_embeddings = mean_pooling(statement_outputs, encoded_statements['attention_mask'])

ones = torch.ones((len(premise_embeddings),1))
twos = 2 * torch.ones((len(statement_embeddings),1))
targets = torch.Tensor(outputs)
concat_embeds = torch.cat((premise_embeddings, statement_embeddings, ones, twos), dim=1)

In [51]:
premises_dev, statements_dev, outputs_dev = premise_statements(dev_data)

tokenizer = AutoTokenizer.from_pretrained('../Sentence Transformer/trial_run')
model = AutoModel.from_pretrained('../Sentence Transformer/trial_run')

encoded_premise_dev = tokenizer(premises_dev, padding=True, truncation=True, max_length=300, return_tensors='pt')
encoded_statements_dev = tokenizer(statements_dev, padding=True, truncation=True, max_length=300, return_tensors='pt')

with torch.no_grad():
    premise_outputs_dev = model(**encoded_premise_dev)

with torch.no_grad():
    statement_outputs_dev = model(**encoded_statements_dev)


premise_embeddings_dev = mean_pooling(premise_outputs_dev, encoded_premise_dev['attention_mask'])
statement_embeddings_dev = mean_pooling(statement_outputs_dev, encoded_statements_dev['attention_mask'])

ones_dev = torch.ones((len(premise_embeddings_dev),1))
twos_dev = 2 * torch.ones((len(statement_embeddings_dev),1))
targets_dev = torch.Tensor(outputs_dev)
concat_embeds_dev = torch.cat((premise_embeddings_dev, statement_embeddings_dev, ones_dev, twos_dev), dim=1)

In [52]:
premises_test, statements_test, outputs_test = premise_statements(test_data)

tokenizer = AutoTokenizer.from_pretrained('../Sentence Transformer/trial_run')
model = AutoModel.from_pretrained('../Sentence Transformer/trial_run')

encoded_premise_test = tokenizer(premises_test, padding=True, truncation=True, max_length=300, return_tensors='pt')
encoded_statements_test = tokenizer(statements_test, padding=True, truncation=True, max_length=300, return_tensors='pt')

with torch.no_grad():
    premise_outputs_test = model(**encoded_premise_test)

with torch.no_grad():
    statement_outputs_test = model(**encoded_statements_test)


premise_embeddings_test = mean_pooling(premise_outputs_test, encoded_premise_test['attention_mask'])
statement_embeddings_test = mean_pooling(statement_outputs_test, encoded_statements_test['attention_mask'])

ones_test = torch.ones((len(premise_embeddings_test),1))
twos_test = 2 * torch.ones((len(statement_embeddings_test),1))
targets_test = torch.Tensor(outputs_test)
concat_embeds_test = torch.cat((premise_embeddings_test, statement_embeddings_test, ones_test, twos_test), dim=1)

# Dataloader for FC Head

In [53]:
dataset = TensorDataset(concat_embeds, targets)
train_dataloader = DataLoader(
    dataset,
    batch_size=8,
    shuffle=True,
    num_workers=4
)

In [54]:
dev_dataset = TensorDataset(concat_embeds_dev, targets_dev)
dev_dataloader = DataLoader(
    dev_dataset,
    batch_size=8,
    shuffle=False,
    num_workers=4
)

In [55]:
test_dataset = TensorDataset(concat_embeds_test, targets_test)
test_dataloader = DataLoader(
    test_dataset,
    batch_size=8,
    shuffle=False,
    num_workers=4
)

# FC Head

In [56]:
class Net(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, num_classes):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_size, hidden_size1)
        self.fc2 = nn.Linear(hidden_size1, hidden_size2)
        self.fc3 = nn.Linear(hidden_size2, num_classes)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = torch.sigmoid(self.fc3(x))
        return x

In [57]:
# Feel free to have different hidden sizes
input_size = concat_embeds.shape[1]
hidden_size1 = 1024
hidden_size2 = 512
num_classes = 1

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)


classifier = Net(input_size, hidden_size1, hidden_size2, num_classes)
classifier.to(device)

criterion = nn.BCELoss()
optimizer = torch.optim.Adam(classifier.parameters(), lr=0.001)

cuda


In [58]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

num_epochs = 100
for epoch in range(num_epochs):
    for i, (X, y) in enumerate(train_dataloader):
      X = X.to(device)
      y = y.to(device)
      y = y.reshape((-1,1))
      outputs = classifier(X)
      loss = criterion(outputs, y)

      # Backward and optimize
      optimizer.zero_grad()
      loss.backward()
      optimizer.step()

    # Test the model with macro F1 on test data loader
    with torch.no_grad():
      correct = 0
      total = 0
      true_y = []
      pred_y = []
      for X, y in test_dataloader:
        X = X.to(device)
        y = y.to(device)
        y = y.reshape((-1,1))
        outputs = classifier(X)
        predicted = torch.round(outputs)
        total += y.size(0)
        correct += (predicted == y).sum().item()
        true_y.extend(y.tolist())
        pred_y.extend(predicted.tolist())

      macro_f1 = f1_score(true_y, pred_y, average='macro')
      print(f"Epoch {epoch} - Test  Accuracy: {100 * correct / total:0.5f}, \t Test  F1: {macro_f1:0.5f}")

Epoch 0 - Test  Accuracy: 54.40000, 	 Test  F1: 0.52942
Epoch 1 - Test  Accuracy: 55.80000, 	 Test  F1: 0.55557
Epoch 2 - Test  Accuracy: 54.60000, 	 Test  F1: 0.51254
Epoch 3 - Test  Accuracy: 54.00000, 	 Test  F1: 0.49179
Epoch 4 - Test  Accuracy: 54.80000, 	 Test  F1: 0.54447
Epoch 5 - Test  Accuracy: 54.00000, 	 Test  F1: 0.49583
Epoch 6 - Test  Accuracy: 54.00000, 	 Test  F1: 0.53416
Epoch 7 - Test  Accuracy: 55.20000, 	 Test  F1: 0.55128
Epoch 8 - Test  Accuracy: 57.00000, 	 Test  F1: 0.56511
Epoch 9 - Test  Accuracy: 56.40000, 	 Test  F1: 0.52805
Epoch 10 - Test  Accuracy: 54.60000, 	 Test  F1: 0.54425
Epoch 11 - Test  Accuracy: 56.00000, 	 Test  F1: 0.53116
Epoch 12 - Test  Accuracy: 55.80000, 	 Test  F1: 0.54837
Epoch 13 - Test  Accuracy: 53.40000, 	 Test  F1: 0.50892
Epoch 14 - Test  Accuracy: 55.20000, 	 Test  F1: 0.54454
Epoch 15 - Test  Accuracy: 56.40000, 	 Test  F1: 0.56263
Epoch 16 - Test  Accuracy: 55.20000, 	 Test  F1: 0.53834
Epoch 17 - Test  Accuracy: 55.20000, 	 Te