In [None]:
# import functions and mounth the drive
#import evaluate as ev
import re
#import semEval as sem
import torch
import random
from transformers import BertModel
from transformers import AutoTokenizer
from tqdm import tqdm_notebook as tqdm
import json

from google.colab import drive
drive.mount('/content/drive')
# This is so that you don't have to restart the kernel everytime you edit hmm.py
%load_ext autoreload
%autoreload 2

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [None]:
def get_json_data(json_file):
    '''loads the subtask data from a json

    args:
      json_file (string): file path
    '''
    with open(json_file, 'r') as fi:
        json_data = json.load(fi)
    return json_data

def get_target_conv_utt_ids(dia_utt, pattern = r'\d+' ):
    '''returns the utterance id that we are trying to find the cuases of for the conversation we are looking at

    args:
      dia_utt (string): an ID as a string that holds the convo id and the utterance id
      pattern (string): regex string to catch all the digits
    '''
    # dia_utt is the dialogu we are looking at with the targeted utterances 'dia1utt3'
    target_ids = re.findall(pattern,dia_utt)
    return target_ids[0], target_ids[1]

def get_cause_span_ids(cause_spans):
    ''' from evaulation.py, the span of a cuase span conoID_start_stop ex. 1_21_17

      args:
        cause_spans (List[string]): list of strings containing the casue spans ex 1_I like python
    '''
    ids = []
    for span in cause_spans:
        span_as_list = span.split('_')
        utterance_ID = span_as_list[0]
        ids.append(int(utterance_ID))
    return ids

def get_train_val_test(data_pairs, train_split = .8, val_split = .1, test_split = .1):
    '''split the data into train, val, test data. it comes out unbalanced (.10 are labeled 1)

    args:
      data_pairs (dict{'pos_cause': string, 'target': string, 'label': int}): the data where pos_cause is a possible cause
      train_split, val_split, test_split (float): the proportional splits of the data
    '''
    dpl = len(data_pairs)
    train = data_pairs[:int(dpl * train_split)]
    val = data_pairs[int(dpl * train_split):int(dpl * train_split) + int(val_split * dpl)]
    test = data_pairs[int(dpl * train_split) + int(val_split * dpl):]
    return train, val, test

def count_labels(cause_pair):
    ''' The counts of the number 1 labels in the nli data pairs

    args:
      cause_pair (dict{'pos_cause': string, 'target': string, 'label': int}): nli data pairs
    '''
    count = 0
    for dp in cause_pair:
        if dp['label'] == 1:
            count += 1
    return count


class BatchTokenizer:
    """Tokenizes and pads a batch of input sentences."""

    def __init__(self, model_name='prajjwal1/bert-small'):
        """Initializes the tokenizer

        Args:
            pad_symbol (Optional[str], optional): The symbol for a pad. Defaults to "<P>".
        """
        self.hf_tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model_name = model_name

    def get_sep_token(self,):
        return self.hf_tokenizer.sep_token

    def __call__(self, prem_batch, hyp_batch):
        """Uses the huggingface tokenizer to tokenize and pad a batch.

        We return a dictionary of tensors per the huggingface model specification.

        Args:
            batch (List[str]): A List of sentence strings

        Returns:
            Dict: The dictionary of token specifications provided by HuggingFace
        """
        # The HF tokenizer will PAD for us, and additionally combine
        # The two sentences deimited by the [SEP] token.
        enc = self.hf_tokenizer(
            prem_batch,
            hyp_batch,
            padding=True,
            return_token_type_ids=False,
            return_tensors='pt'
        )

        return enc


def generate_pairwise_input(dataset):
    """
    TODO: group all premises and corresponding hypotheses and labels of the datapoints
    a datapoint as seen earlier is a dict of premis, hypothesis and label
    """
    pos_causes = []
    targets = []
    labels = []
    for x in dataset:
        pos_causes.append(x['pos_cause'])
        targets.append(x['target'])
        labels.append(x['label'])

    return pos_causes, targets, labels

def chunk(lst, n):
    """Yield successive n-sized chunks from lst."""
    for i in range(0, len(lst), n):
        # yeild function is similair to the return function, but it continues to give the value back. here are are making batches
        yield lst[i:i + n]

def chunk_multi(lst1, lst2, n):
    for i in range(0, len(lst1), n):
        yield lst1[i: i + n], lst2[i: i + n]


def encode_labels(labels):
    """Turns the batch of labels into a tensor

    Args:
        labels (List[int]): List of all labels in the batch

    Returns:
        torch.FloatTensor: Tensor of all labels in the batch
    """
    #return torch.LongTensor([int(l) for l in labels]) # binary classification
    return torch.LongTensor(labels)



class NLIClassifier(torch.nn.Module):
    def __init__(self, output_size: int, hidden_size: int, model_name='prajjwal1/bert-small'):
        super().__init__()
        self.output_size = output_size
        self.hidden_size = hidden_size

        # Initialize BERT, which we use instead of a single embedding layer.
        self.bert = BertModel.from_pretrained(model_name)

        # from the transfomers library
        #self.bert_sequence= BertForSequenceClassification.from_pretrained('bert-base-uncased')


        # TODO [OPTIONAL]: Updating all BERT parameters can be slow and memory intensive.
        # Freeze them if training is too slow. Notice that the learning
        # rate should probably be smaller in this case.
        # Uncommenting out the below 2 lines means only our classification layer will be updated.

        for param in self.bert.parameters():
           param.requires_grad = False

        self.bert_hidden_dimension = self.bert.config.hidden_size

        # TODO: Add an extra hidden layer in the classifier, projecting
        #      from the BERT hidden dimension to hidden size. Hint: torch.nn.Linear(), should we have a bias term?
        # torch.nn.Linear(in_features, out_features, bias=True, device=None, dtype=None)

        self.hidden_layer = torch.nn.Linear(self.bert_hidden_dimension, self.hidden_size, bias = True)

        # TODO: Add a relu nonlinearity to be used in the forward method
        #      https://pytorch.org/docs/stable/generated/torch.nn.ReLU.html
        # torch.nn.RelU returns a function that uses the ReLU logic

        self.relu = torch.nn.ReLU()

        # the regular classifier
        #self.classifier = torch.nn.Linear(self.hidden_size, self.output_size)
        # the regular log_softmax
        self.log_softmax = torch.nn.LogSoftmax(dim=1)

        # the classifier for 0 (which is not being in the cause span)
        self.classifier_out = torch.nn.Linear(self.hidden_size, self.output_size)
        # the classifier for 1 (which is being in the cause span)
        self.classifier_in = torch.nn.Linear(self.hidden_size, self.output_size)
        # when we use two classifiers and combine them then we have a new dimension to deal with
        self.log_softmax_binary = torch.nn.LogSoftmax(dim = 2)

    def encode_text(self,symbols):
        """Encode the (batch of) sequence(s) of token symbols BERT.
            Then, get CLS represenation.

        Args:
            symbols (Dict): The Dict of token specifications provided by the HuggingFace tokenizer

        Returns:
            torch.Tensor: CLS token embedding
        """
        # First we get the contextualized embedding for each input symbol
        # We no longer need an LSTM, since BERT encodes context and
        # gives us a single vector describing the sequence in the form of the [CLS] token.
        encoded_sequence = self.bert(**symbols)
        # TODO: Get the [CLS] token
        #      The BertModel output. See here: https://huggingface.co/docs/transformers/model_doc/bert#transformers.BertModel
        #      and check the returns for the forward method.
        # We want to return a tensor of the form batch_size x 1 x bert_hidden_dimension
        # print(encoded_sequence.last_hidden_state.shape)
        # Return only the first token's embedding from the last_hidden_state. Hint: using list slices
        return encoded_sequence.last_hidden_state[:,0,:]



    def forward(self,symbols):
        """_summary_

        Args:
            symbols (Dict): The Dict of token specifications provided by the HuggingFace tokenizer

        Returns:
            torch.Tensor: _description_
        """
        encoded_sents = self.encode_text(symbols)
        output = self.hidden_layer(encoded_sents)
        output = self.relu(output)
        #output = self.classifier(output)
        output_out = self.classifier_out(output)
        output_in = self.classifier_in(output)

        output = torch.stack([output_out, output_in], dim = -1)

        return self.log_softmax(output)


# For making predictions at test time
def predict(model, sents):
    #logits = model(sents)
    #return list(torch.argmax(logits, axis=1).squeeze().numpy()) # changed the axis to be 1
    #return list(torch.Tensor.cpu(torch.argmax(logits, axis=1).squeeze()).numpy()) # change the axis to be 1
    logits = model(sents.to(device))
    convert_to_label = torch.argmax(logits, dim= 2)
    return convert_to_label

def training_loop(
    num_epochs,
    train_features,
    train_labels,
    dev_sents,
    dev_labels,
    optimizer,
    model,
):
    print("Training...")
    loss_func = torch.nn.NLLLoss()
    batches = list(zip(train_features, train_labels))
    random.shuffle(batches)
    for i in range(num_epochs):
        losses = []
        for features, labels in tqdm(batches):

            # Empty the dynamic computation graph
            optimizer.zero_grad()
            device = torch.device('cuda:0')
            #print(features.to(device))
            # our prediction feature produces a softmax over the list labels = [0,0,0,0,0,1,1,1,1,0,0,0,0,0], so I believe
            # that it is thinking we have len(labels) number of labels, where in fact we actually have two labels and each word needs a softmax
            preds = model(features.to(device))#.squeeze(1)
            # from this prediction turn the softmax into hard classes
            # or we need to change labels so instead of [[0,0,1,1,0,0], [1,1,1,1,0,0,0,0]], have a 1 indicating where the true class is
            # like this: [[[1,0],[1,0],[0,1],...]] I think? No, since NLLLoss expects the data to be two dimensional, we need to shape the preds not the labels.

            padded_labels = torch.nn.functional.pad(labels, pad=(0, preds.shape[1] - labels.size(1)))

            loss = sum([loss_func(preds[i], padded_labels[i].to(device)) for i in range(len(preds))])
            #loss = loss_func(preds, labels.to(device))

            # Backpropogate the loss through our model
            loss.backward()
            optimizer.step()
            losses.append(loss.item())

        print(f"epoch {i}, avg. loss of the batches: {sum(losses)/len(losses)}")
        # Estimate the f1 score for the development set
        print("Evaluating dev...")
        all_preds = []
        all_labels = []
        padded_dev_labels =[torch.nn.functional.pad(batch_dev_labels, pad=(0, preds.shape[1] - batch_dev_labels.size(1))) for batch_dev_labels in dev_labels]
        # fix this please
        for sents, labels in tqdm(zip(dev_sents, padded_dev_labels), total=len(dev_sents)):
            #pred = predict(model, sents).cpu()

            pred = predict(model, sents)

            all_preds.extend(list(pred.cpu().numpy()))
            all_labels.extend(list(labels.cpu().numpy()))
        # fix this please!  numpy array?
        #return all_preds, all_labels
        # dev_macro_f1 = macro_f1(all_preds, all_labels, possible_labels =  list(set(all_labels)) ) # maybe we can get this too work
        dev_f1_score = f1_score(all_preds, all_labels, which_label = 1) # possible_labels =  list(set(all_labels))  ) # this is giving only 1, so something isn't correct

        print(f"Dev F1 Score {dev_f1_score}") #, Dev Macro f1 {dev_macro_f1} ")

    # Return the trained model
    return model

import numpy as np
from numpy import sum as t_sum
from numpy import logical_and

# problem with which device the data is on
# predicted_labels and true_labels are a list of numpy arrays? but then why are
def precision(predicted_labels, true_labels, which_label=1):
    """
    Precision is True Positives / All Positives Predictions
    """
    #predicted_labels, true_lables = [t.cpu().numpy() for t in predicted_labels], [t.cpu().numpy() for t in true_labels]
    #pred_list =

    pred_which = np.array([pred == which_label for pred in predicted_labels])
    true_which = np.array([lab == which_label for lab in true_labels])
    denominator = t_sum(pred_which)
    if denominator:
        return t_sum(logical_and(pred_which, true_which))/denominator
    else:
        return 0.


def recall(predicted_labels, true_labels, which_label=1):
    """
    Recall is True Positives / All Positive Labels
    """
    #predicted_labels, true_lables = [t.cpu().numpy() for t in predicted_labels], [t.cpu().numpy() for t in true_labels]
    pred_which = np.array([pred == which_label for pred in predicted_labels])
    true_which = np.array([lab == which_label for lab in true_labels])
    denominator = t_sum(true_which)
    if denominator:
        return t_sum(logical_and(pred_which, true_which))/denominator
    else:
        return 0.

# error here
def f1_score(
    predicted_labels,
    true_labels,
    which_label
):
    """
    F1 score is the harmonic mean of precision and recall
    """
    P = precision(predicted_labels, true_labels, which_label=which_label)
    R = recall(predicted_labels, true_labels, which_label=which_label)

    if P and R:
        return 2*P*R/(P+R)
    else:
        return 0.


def macro_f1(
    predicted_labels,
    true_labels,
    possible_labels,
    label_map=None
):
    converted_prediction = [label_map[int(x)] for x in predicted_labels] if label_map else predicted_labels
    scores = [f1_score(converted_prediction, true_labels, l) for l in possible_labels]
    # Macro, so we take the uniform avg.
    return sum(scores) / len(scores)


def balance_data_pairs(data_pairs):
  balanced_data_pairs = []
  count_zero = 0
  count_one  = count_labels(data_pairs)
  for dp in data_pairs:
    if dp['label'] != 1 and count_zero < count_one:
      balanced_data_pairs.append(dp)
      count_zero += 1
    elif dp['label'] == 1:
      balanced_data_pairs.append(dp)
  return balanced_data_pairs

def print_test_metrics(all_preds, all_labels, i = 20):
  for pred, label in zip(all_preds[:i], all_labels[:i]):
    print(pred,label)

  print('Recall %.2f'%recall(all_preds, all_labels))

  print('Precision %.2f'%precision(all_preds, all_labels))

# Participants need to provide the position indexes of the cause span in the prediction file!!!
# You can use this function to obtain the position indexes of golden annotations.
def get_span_position(span, utterance):
    begin_id, end_id = 0, 0
    cause_token = span.split()
    utterance_token = utterance.split()
    for wi in range(len(utterance_token)):
        if (wi+len(cause_token))<=len(utterance_token) and utterance_token[wi:wi+len(cause_token)] == cause_token:
            begin_id = wi
            end_id = wi+len(cause_token)
            break
    return [begin_id, end_id] # start from 0, [begin_id, end_id)


def balance_data_pairs_label_list(data_pairs):
  # init an empty list to hold the process data_pairs
  balanced_data_pairs = []
  # init the count of completely negative cause pairs
  count_zero = 0
  # init the count of positive
  count_one = count_labels_label_list(data_pairs) #* 0 # how well does our model do when we don't have any negative examples? # we will try twice as many completely negative examples and see what that does for our f1
  #print(count_one)
  for dp in data_pairs:
    if 1 not in dp['label'] and count_zero < count_one:
      balanced_data_pairs.append(dp)
      count_zero += 1
    elif 1 in dp['label']:
      balanced_data_pairs.append(dp)
  #print(count_zero, count_one)
  return balanced_data_pairs


def count_labels_label_list(cause_pair):
    ''' The counts of the number of positive causes in the nli data pairs, where positive cuase has a 1 in the label list

    args:
      cause_pair (dict{'pos_cause': string, 'target': string, 'label': int}): nli data pairs
    '''
    count = 0
    for dp in cause_pair:
        if 1 in dp['label']:
            count += 1
    return count

def get_max_length_batch(batch):
  max_length = 0
  for labels in batch:
    if len(labels) > max_length:
      max_length = len(labels)
  return max_length


def pad_label_batch(batch):
  max_lengths = get_max_length_batch(batch)
  b = []
  for labels in batch:
    new_labels = labels + [0] * (max_lengths - len(labels))
    b.append(new_labels)
  return b

def get_binary_labels_data_pairs(data):
  # create data where label is eof {'pos_cause': , 'target': , 'label': , }
  nli_data_pairs = []
  for d in data:
      # get the target text
      convo_id, target_utt_id = get_target_conv_utt_ids(d['emotion_utterance_ID'])
      target_text = d['conversation'][int(target_utt_id) - 1]['text']
      cause_spans_ids = get_cause_span_ids(d['cause_spans'])
      # for every conversation
      for utterance in d['conversation']:
          label = 1 if utterance['utterance_ID'] in cause_spans_ids else 0
          nli_data_pairs.append({'pos_cause': utterance['text'],'target': target_text,'label': label})
  return nli_data_pairs

def get_sequence_labels_data_pairs(data):
  # create precise datat where the label is a list [0,0,0,0,0,0,0,0,1,1,1,1,1] describing if the word is in the cause span
  nli_data_pairs = []
  for d in data:
      # get the target text
      convo_id, target_utt_id = get_target_conv_utt_ids(d['emotion_utterance_ID'])
      target_text = d['conversation'][int(target_utt_id) - 1]['text']
      cause_spans_ids = get_cause_span_ids(d['cause_spans'])
      # for every conversation

      cause_span_idx = 0
      for utterance in d['conversation']:
          if utterance['utterance_ID'] in cause_spans_ids:
              span = d['cause_spans'][cause_span_idx]
              span_as_list = span.split('_')
              cause_span_idx += 1
              beg_end_idx = get_span_position(span_as_list[1], utterance['text'])
              label = [0] * len(utterance['text'].split())
              for i in range(len(label)):
                  if i >= beg_end_idx[0] and i < beg_end_idx[1]:
                      label[i] = 1

          else:
              label = [0] * len(utterance['text'].split())
          nli_data_pairs.append({'pos_cause': utterance['text'],'target': target_text,'label': label})
  return nli_data_pairs

In [None]:
# read in the data with function from semEval.py.  Data is from semEval website and I put it into my drive for the google colab
data = get_json_data('drive/My Drive/Subtask_1_1_train.json')

# make use of the GPU runtime
print(torch.cuda.is_available())
device = torch.device("cpu")
# TODO: Uncomment the below line if you see True in the print statement
device = torch.device("cuda:0")

True


In [None]:
# generate data and create train test val
# TODO: rewrite balance data pairs
data_pairs = get_sequence_labels_data_pairs(data)
random.seed(42)
random.shuffle(data_pairs)
#nli_data_pairs = balance_data_pairs(nli_data_pairs)
data_pairs = balance_data_pairs_label_list(data_pairs)
# for some reason it wants me to shuffle twice in order to get a balanced data set in train val and test
random.shuffle(data_pairs)
train, val, test = get_train_val_test(data_pairs)

In [None]:
# check the distribution of the labels. if the second number in the columns isn't .5 then rerun the cell that create train test val

print(len(train) / len(data_pairs), count_labels_label_list(train) / len(train))
print(len(val) / len(data_pairs), count_labels_label_list(val)/ len(val))
print(len(test) / len(data_pairs), count_labels_label_list(test) / len(test))
print( count_labels_label_list(data_pairs) / len(data_pairs))

0.8 0.5036885245901639
0.1 0.47650273224043715
0.1 0.49398907103825135
0.5


In [None]:
data_pairs[5]

{'pos_cause': 'No .',
 'target': 'Hi ! I am Dr . Drake Remoray and I have a few routine questions I need to ask you .',
 'label': [0, 0]}

In [None]:
# testing out the tokenizer
tokenizer = BatchTokenizer()
x = tokenizer(*[["this is the first premise", "This is the second premise"], ["This is first hypothesis", "This is the second hypothesis"]])
print(x)
tokenizer.hf_tokenizer.batch_decode(x["input_ids"])

{'input_ids': tensor([[  101,  2023,  2003,  1996,  2034, 18458,   102,  2023,  2003,  2034,
         10744,   102,     0],
        [  101,  2023,  2003,  1996,  2117, 18458,   102,  2023,  2003,  1996,
          2117, 10744,   102]]), 'attention_mask': tensor([[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0],
        [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]])}


['[CLS] this is the first premise [SEP] this is first hypothesis [SEP] [PAD]',
 '[CLS] this is the second premise [SEP] this is the second hypothesis [SEP]']

# We can batch the train, validation, and test data, and then run it through the tokenizer

In [None]:
# return a list of lists where each list is a data point and the data is untokenized and unbatched and still different lengths
train_causes, train_targets, train_labels = generate_pairwise_input(train)
validation_causes, validation_targets, validation_labels = generate_pairwise_input(val)
test_causes, test_targets, test_labels = generate_pairwise_input(test)

# batch and tokenize the train causes and targets
batch_size = 16

# Notice that since we use huggingface, we tokenize and
# encode in all at once!
tokenizer = BatchTokenizer()

# Tokenize + encode
train_input_batches = [b for b in chunk_multi(train_causes, train_targets, batch_size)]
train_input_batches = [tokenizer(*batch) for batch in train_input_batches]

# batch and pad the labels
train_label_batches = [b for b in chunk(train_labels, batch_size)]
train_label_batches = [pad_label_batch(batch) for batch in train_label_batches]
train_label_batches = [encode_labels(batch) for batch in train_label_batches]

# this is a class we build
batch_tokenizer = BatchTokenizer()

# create batches of validation data
# Tokenize + encode inputs
validation_input_batches = [b for b in chunk_multi(validation_causes, validation_targets, batch_size)]
validation_input_batches = [batch_tokenizer(*batch) for batch in validation_input_batches]

# batch and pad labels
validation_batch_labels = [b for b in chunk(validation_labels, batch_size)]
validation_batch_labels= [pad_label_batch(batch) for batch in validation_batch_labels]
validation_batch_labels = [encode_labels(batch) for batch in validation_batch_labels]


config.json:   0%|          | 0.00/286 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

In [None]:
# run the training loop with params
# Best Hyperparams: {epochs: 20, LR: .001, hidden_size = 32, batch_size: 16}
# You can increase epochs if need be
epochs = 100
# TODO: Find a good learning rate and hidden size
LR = 0.001
hidden_size = 32

#possible_labels = set(train_labels)
# we build this.
model = NLIClassifier(output_size=64, hidden_size=hidden_size)
# device tells us which GPU to use?
model.to(device)
# This is the optimizer from torch. we pass it parameters and the learnig rate
# how does adamW optimize the weights?
optimizer = torch.optim.AdamW(model.parameters(), LR)
#
training_loop(
    epochs,
    train_input_batches,
    train_label_batches,
    validation_input_batches,
    validation_batch_labels,
    optimizer,
    model,
);

pytorch_model.bin:   0%|          | 0.00/116M [00:00<?, ?B/s]

Training...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for features, labels in tqdm(batches):


  0%|          | 0/915 [00:00<?, ?it/s]

epoch 0, avg. loss of the batches: 66.04193728087378
Evaluating dev...


Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for sents, labels in tqdm(zip(dev_sents, padded_dev_labels), total=len(dev_sents)):


  0%|          | 0/115 [00:00<?, ?it/s]

Dev F1 Score 0.3312807881773399


  0%|          | 0/915 [00:00<?, ?it/s]

epoch 1, avg. loss of the batches: 65.57678947240277
Evaluating dev...


  0%|          | 0/115 [00:00<?, ?it/s]

Dev F1 Score 0.3385322316970795


  0%|          | 0/915 [00:00<?, ?it/s]

KeyboardInterrupt: ignored

In [None]:
# save the model
PATH =  "drive/MyDrive/#model_bert1"
save_not_load = False
if save_not_load:
  torch.save(model.state_dict(), PATH)

# some time later...
# Model class must be defined somewhere

if not save_not_load:
  model_from_drive = NLIClassifier(output_size=64, hidden_size=hidden_size)
  model_from_drive.load_state_dict(torch.load(PATH))
model_from_drive


In [None]:
##########################
#        Testing         #
##########################
# Tokenize + encode inputs
test_input_batches = [b for b in chunk_multi(test_causes, test_targets, batch_size)]
test_input_batches = [batch_tokenizer(*batch) for batch in test_input_batches]

# batch and pad labels
test_batch_labels = [b for b in chunk(test_labels, batch_size)]
test_batch_labels= [pad_label_batch(batch) for batch in test_batch_labels]
test_batch_labels = [encode_labels(batch) for batch in test_batch_labels]

all_preds_test = []
all_labels_test = []
# pad the labels
# 64 is the output size
padded_test_labels =[torch.nn.functional.pad(batch_labels, pad=(0, 64 - batch_labels.size(1))) for batch_labels in test_batch_labels]

for sents, labels in tqdm(zip(test_input_batches, padded_test_labels), total=len(padded_test_labels)):
    #pred = predict(model, sents).cpu()

    pred = predict(model_from_drive.to(device), sents)

    all_preds_test.extend(list(pred.cpu().numpy()))
    all_labels_test.extend(list(labels.cpu().numpy()))

#dev_f1 = macro_f1_score(all_preds, all_labels)
test_f1 = f1_score(all_preds_test, all_labels_test,  which_label = 1) #, possible_labels =  list(set(all_labels_test))  )
print(test_f1)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for sents, labels in tqdm(zip(test_input_batches, padded_test_labels), total=len(padded_test_labels)):


  0%|          | 0/115 [00:00<?, ?it/s]

0.44724626053068944


In [None]:
#dev_sents[0] #, dev_labels
#su#m(np.array(all_preds) - np.array(all_labels))
all_preds_test
all_labels_test
len(test_input_batches[0][0])

52

In [None]:
all_preds = []
all_labels = []
# we have to pad the dev_lables
for sents, labels in tqdm(zip(dev_sents, dev_labels), total=len(dev_sents)):

  pred = predict(model, sents)

  all_preds.extend(list(pred.cpu().numpy()))
  all_labels.extend(list(labels.cpu().numpy()))

#all_preds = [tensor.cpu() for tensor in all_preds]
  # fix this please!  numpy array?

#dev_macro_f1 = macro_f1(all_preds, all_labels, possible_labels =  list(set(all_labels)) )
#dev_f1_score = f1_score(all_preds, all_labels, which_label = 1) # possible_labels =  list(set(all_labels))  )

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for sents, labels in tqdm(zip(dev_sents, dev_labels), total=len(dev_sents)):


  0%|          | 0/229 [00:00<?, ?it/s]

In [None]:
which_label = 1
predicted_labels = all_preds
true_labels = all_labels
pred_which = np.array([pred == which_label for pred in predicted_labels])
true_which = np.array([lab == which_label for lab in true_labels])
#pred_which
denominator = t_sum([t_sum(p) for p in predicted_labels])

logical = [logical_and(p, t) for p,t in zip(pred_which, true_which)]
#print(t_sum([t_sum(logic) for logic in logical]) / denominator)

#logical

#if denominator:
 #   return t_sum(logical_and(pred_which, true_which))/denominator
#else:
 #   return 0

  true_which = np.array([lab == which_label for lab in true_labels])


ValueError: ignored

In [None]:
#np.array(all_preds) - np.array(all_labels)
for ten in all_preds[:2]:
  print(ten.device)
  for t in [[tensor.cpu() for tensor in ten]]:
    print(t)

AttributeError: ignored

In [None]:
# we have to fix
loss_func = torch.nn.NLLLoss()
# we need to pad these labels
#print()
#labels = []
#[labels preds.shape[1]
#print(preds[0].shape, labels[0].shape)
#labels, preds
#print(labels[0].shape)
#print(labels.shape)
#print(labels)
#print()
#nest_list = labels.tolist()
#print([[[x] for x in nest_list] for nest_list in labels.tolist()])
#print(preds.T.shape,preds.shape)
#print(preds[:,:30, :])
#loss = loss_func(preds.T, labels.to(device))
#loss = loss_func(preds, torch.LongTensor([[[x] for x in nest_list] for nest_list in labels.tolist()]).to(device))
#loss = loss_func(torch.max(preds[:,:30, :], dim = 1) ,labels.to(device))
#loss = loss_func(torch.LongTensor(torch.max(preds[:,:30, :], dim = 1)) ,torch.LongTensor([[[x] for x in nest_list] for nest_list in labels.tolist()]).to(device))
#torch.max(preds[:,:30, :], dim = -1)
#torch.max(preds[:,:30, :])
#print(torch.max(preds[:,:30, :], dim = -1).values)
#outs = torch.max(preds[:,:30, :], dim = -1).values
##loss = loss_func(outs,labels.to(device))
#labels
#print(preds)
preds.shape[1]
#sumloss_func(preds,padded_labels.to(device))

128

In [None]:
# Example data (random)
batch_size = 16
num_classes = 5
preds_ex = torch.randn(batch_size, num_classes)  # Random predictions
target_ex = torch.randint(0, num_classes, (batch_size,))  # Random target labels

# Apply log softmax to predictions
preds_log_softmax = torch.nn.functional.log_softmax(preds_ex, dim=1)

# Initialize NLLLoss
criterion = torch.nn.NLLLoss()
print(preds_log_softmax, target_ex)
# Calculate loss
loss = criterion(preds_log_softmax, target_ex)

# Print the loss
print(loss.item())

In [None]:
# let's take a look at what is going on in the validation predictions
validation_input_batches = [b for b in chunk_multi(validation_causes, validation_targets, batch_size)]

# Tokenize + encode
validation_input_batches = [batch_tokenizer(*batch) for batch in validation_input_batches]
validation_batch_labels = [b for b in chunk(validation_labels, batch_size)]
validation_batch_labels = [encode_labels(batch) for batch in validation_batch_labels]


all_preds = []
all_labels = []

for sents, labels in tqdm(zip(validation_input_batches, validation_batch_labels), total=len(validation_batch_labels)):
    #pred = predict(model, sents).cpu()

    pred = predict(model, sents)

    all_preds.extend(pred)
    all_labels.extend(list(labels.cpu().numpy()))

f1_dev = f1_score(all_preds, all_labels, which_label= 1)
f1_macro_dev = macro_f1(all_preds, all_labels, possible_labels =  list(set(all_labels))  )
print(f1_dev, f1_macro_dev)
print(sum(all_preds)/ len(all_preds), sum(all_preds) )
print_test_metrics(all_preds, all_labels)

ValueError: ignored

In [None]:

# batch and tokenize the train causes and targets
batch_size = 16

# Notice that since we use huggingface, we tokenize and
# encode in all at once!

train_input_batches = [b for b in chunk_multi(train_causes, train_targets, batch_size)]
#print(train_input_batches)
# Tokenize + encode
train_input_batches = [tokenizer(*batch) for batch in train_input_batches]

# batch the labels
train_label_batches = [b for b in chunk(train_labels, batch_size)]
train_label_batches = [encode_labels(batch) for batch in train_label_batches]


for sents, labels in tqdm(zip(train_input_batches, train_label_batches), total=len(train_label_batches)):
    #pred = predict(model, sents).cpu()

    pred = predict(model, sents)

    all_preds.extend(pred)
    all_labels.extend(list(labels.cpu().numpy()))

#dev_f1 = macro_f1_score(all_preds, all_labels)
test_f1 = macro_f1(all_preds, all_labels, possible_labels =  list(set(all_labels))  )

print(sum(all_preds)/ len(all_preds), sum(all_preds) )
print_test_metrics(all_preds, all_labels)

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  for sents, labels in tqdm(zip(train_input_batches, train_label_batches), total=len(train_label_batches)):


  0%|          | 0/695 [00:00<?, ?it/s]

0.3789587413059626 5612
0 0
0 0
1 1
0 0
0 0
0 1
0 1
0 0
0 0
1 1
0 0
1 1
0 0
0 1
0 1
1 1
0 1
1 1
0 0
0 0
Recall 0.63
Precision 0.83
