In [54]:
from sklearn.metrics import matthews_corrcoef
from transformers import BertForSequenceClassification
import torch
import numpy as np
import pickle
from torch.utils.data import TensorDataset, DataLoader, SequentialSampler




In [57]:
# This file test the first version of the model: classification with context

PRETRAINED_MODEL_NAME = '/home/daril_kw/data/savings_for_60_rows/model_before_training_opti_full_for_para_60'

DATALOADER_DIR = "/home/daril_kw/data/savings_for_60_rows/test_dataloader_60.pt"


In [60]:
# device = torch.device("cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# load the prediction_dataloader
prediction_dataloader = torch.load(DATALOADER_DIR)


# we load the model
model = BertForSequenceClassification.from_pretrained(PRETRAINED_MODEL_NAME)
model.to(device)
print("we evaluate")
model.eval()

# Tracking variables
predictions, true_labels, list_inputs_test = [], [], []


we evaluate


In [65]:
# get the first batch in the prediction_dataloader

first_batch = next(iter(prediction_dataloader))
first_batch # this is a tuple of 3 elements: input_ids, attention_mask, labels
# first_batch[0].shape, first_batch[1].shape, first_batch[2].shape
first_batch = tuple(t.to(device) for t in first_batch) # we put the batch on the device


In [66]:
first_batch

(tensor([[  101, 29308,   128,  ...,     0,     0,     0],
         [  101, 29174,   128,  ...,     0,     0,     0],
         [  101, 29125,   128,  ...,     0,     0,     0],
         ...,
         [  101, 29282,   128,  ...,     0,     0,     0],
         [  101, 29850,   128,  ...,     0,     0,     0],
         [  101, 29725,   128,  ...,     0,     0,     0]], device='cuda:0'),
 tensor([[1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         ...,
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.],
         [1., 1., 1.,  ..., 0., 0., 0.]], device='cuda:0'),
 tensor([39, 34, 18, 14, 12, 48, 10,  0,  9, 40, 42, 36], device='cuda:0'))

In [69]:
b_input_ids, b_input_mask, b_labels = first_batch


In [70]:
b_input_ids

tensor([[  101, 29308,   128,  ...,     0,     0,     0],
        [  101, 29174,   128,  ...,     0,     0,     0],
        [  101, 29125,   128,  ...,     0,     0,     0],
        ...,
        [  101, 29282,   128,  ...,     0,     0,     0],
        [  101, 29850,   128,  ...,     0,     0,     0],
        [  101, 29725,   128,  ...,     0,     0,     0]], device='cuda:0')

In [71]:

# move the batch to the device because we are using the GPU. the previous instruction tuple(t.to(device) for t in first_batch) is just a short cut for this
b_input_ids = b_input_ids.to(device)
b_input_mask = b_input_mask.to(device)
b_labels = b_labels.to(device)

In [72]:
b_input_ids

tensor([[  101, 29308,   128,  ...,     0,     0,     0],
        [  101, 29174,   128,  ...,     0,     0,     0],
        [  101, 29125,   128,  ...,     0,     0,     0],
        ...,
        [  101, 29282,   128,  ...,     0,     0,     0],
        [  101, 29850,   128,  ...,     0,     0,     0],
        [  101, 29725,   128,  ...,     0,     0,     0]], device='cuda:0')

Inference

In [83]:
# We will compute the inference time
import time
t0 = time.time()
with torch.no_grad():
    outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
    logits = outputs[0]
t1 = time.time()
infer_time = t1-t0
print("inference time: ", infer_time)

inference time:  0.03991341590881348


In [84]:
# Then for one trajectory, the inference time is 
trajectory_inference_time = infer_time/len(b_input_ids)
# If we have 300 points in the trajectory, the inference time is 300*trajectory_inference_time
print(f"One trajectory inference time: {trajectory_inference_time}\n Total inference time for 300 points: {300*trajectory_inference_time}")

One trajectory inference time: 0.003326117992401123
 Total inference time for 300 points: 0.9978353977203369


Details of the outputs


In [96]:
# The outputs are the logits(=scores) for each class. We take the class with the highest score as the prediction
# For each input, we take the class with the highest score as the prediction
logits #logits means the scores for each class. Then if we have 10 classes, we have 10 scores for each input
# For example 


tensor([[ 0.1756,  0.0554, -0.2076,  ..., -0.3755,  0.3280, -0.0929],
        [ 0.1270,  0.0911, -0.2356,  ..., -0.4460,  0.2182, -0.0972],
        [ 0.1986,  0.1220, -0.2504,  ..., -0.4339,  0.1873,  0.0108],
        ...,
        [ 0.1416,  0.1100, -0.2492,  ..., -0.4008,  0.2238, -0.1127],
        [ 0.1544,  0.2864, -0.3465,  ..., -0.3810,  0.1673, -0.1443],
        [ 0.1566,  0.1921, -0.4025,  ..., -0.3802,  0.1699, -0.0809]],
       device='cuda:0')

In [97]:
logits[0] # this is the scores for the first input

tensor([ 1.7556e-01,  5.5411e-02, -2.0764e-01, -5.3356e-02, -4.8283e-01,
         3.9959e-01,  2.4936e-01, -6.1580e-02,  1.2208e+00,  8.8233e-01,
         7.9652e-02, -9.1809e-01, -6.1648e-01,  2.2795e-02, -4.9137e-01,
         2.5167e-02,  5.0348e-01,  8.9886e-01,  5.3918e-01,  4.7088e-01,
        -2.3605e-01,  3.0360e-01,  6.9202e-01, -2.3386e-01,  4.7819e-01,
         6.2067e-01, -2.8057e-01,  5.3282e-01, -1.3414e-01,  7.5720e-02,
         2.0141e-01, -7.7584e-01, -3.0211e-01,  3.8304e-02, -2.9572e-01,
        -3.7977e-01, -2.4700e-01, -6.0180e-01,  6.9280e-01, -7.9707e-01,
        -1.0991e+00, -6.2875e-02,  2.0101e-01,  2.4983e-01,  1.3069e-01,
        -1.1322e-01, -2.4029e-01, -4.3113e-02, -4.4805e-01,  1.3670e-01,
         5.0791e-02,  5.8126e-03,  1.4891e-01,  1.6357e-01, -6.0650e-03,
        -7.7350e-02, -3.3025e-01, -3.7892e-01, -2.1078e-01,  1.2388e-01,
        -7.3392e-01,  5.1020e-01, -3.2051e-02, -3.1992e-01, -5.2206e-01,
        -6.3849e-01,  4.4133e-01, -4.0169e-01, -3.0

In [None]:
8ijo m7867

In [98]:
# The number of classes is the number of columns in the logits
num_classes = logits.shape[1]
num_classes

892

In [108]:
logits.shape

torch.Size([12, 892])

In [100]:
# for each input, we take the class with the highest score as the prediction
_, current_prediction = torch.max(logits, 1)
current_prediction

tensor([  8,   8, 827, 725, 725, 827, 827, 827, 725, 827, 725, 725],
       device='cuda:0')

In [101]:
len(current_prediction)

12

In [103]:
# Now, we add these prediction to the initial inputs and try to predict the next point.
# We will move the sep token (102) on on position and replace it position with the predicted class.
# For example, if we have the input [101, 5, 7, 8, 9 ,102, 0,0,0,0,0], and the predicted class is 3 then we'll have [101,5, 7,8,9,3,102,0,0,0,0] as the new input


def add_prediction_to_input(input_ids, prediction, sep_token_id=102):
    # we find the position of the sep token
    sep_token_position = (input_ids == sep_token_id).nonzero(as_tuple=True)[0]
    # we replace the sep token with the prediction
    input_ids[sep_token_position] = prediction
    # move the sep token to the next position
    input_ids[sep_token_position+1] = sep_token_id
    return input_ids

In [None]:
add_prediction_to_input(b_input_ids[0], current_prediction[0])

In [134]:
def add_prediction_to_input_batch(input_ids, predictions, sep_token_id=102):
    # new_input_ids = input_ids.clone() # if we directly modify input_ids, it will modify the original input_ids and we gain in memory
    for i in range(len(predictions)):
        # if the current prediction is already the sep token, we don't change the input or if there the position of the septoken is the last position
        if predictions[i] == sep_token_id or (input_ids[i] == sep_token_id).nonzero(as_tuple=True)[0] == len(input_ids[i])-1:
            continue
        else:
            input_ids[i] = add_prediction_to_input(input_ids[i], predictions[i])
    return input_ids

In [135]:
# This show the position of token sep before and after the modification of b_input_ids
old_positions = [ (b_input_ids[i] == 102).nonzero(as_tuple=True)[0] for i in range(len(b_input_ids))]
new_positions = [ (add_prediction_to_input(b_input_ids[i], current_prediction[i]) == 102).nonzero(as_tuple=True)[0] for i in range(len(b_input_ids))]
print(f"{old_positions}\n {new_positions}\n")
# The position shoudn't be the same

bools = [old_positions[i] == new_positions[i] for i in range(len(old_positions))]
print(f"Are the positions the same?\n {bools}\n")
supposed_bools =[ old_positions[i] == new_positions[i] - 1 for i in range(len(old_positions))]
print(f"Are the positions the same?\n {supposed_bools}\n")

print("And of supposed_bools, are they all True? ", all(supposed_bools))




[tensor([59], device='cuda:0'), tensor([58], device='cuda:0'), tensor([34], device='cuda:0'), tensor([45], device='cuda:0'), tensor([49], device='cuda:0'), tensor([42], device='cuda:0'), tensor([39], device='cuda:0'), tensor([40], device='cuda:0'), tensor([36], device='cuda:0'), tensor([46], device='cuda:0'), tensor([71], device='cuda:0'), tensor([37], device='cuda:0')]
 [tensor([60], device='cuda:0'), tensor([59], device='cuda:0'), tensor([35], device='cuda:0'), tensor([46], device='cuda:0'), tensor([50], device='cuda:0'), tensor([43], device='cuda:0'), tensor([40], device='cuda:0'), tensor([41], device='cuda:0'), tensor([37], device='cuda:0'), tensor([47], device='cuda:0'), tensor([72], device='cuda:0'), tensor([38], device='cuda:0')]

Are the positions the same?
 [tensor([False], device='cuda:0'), tensor([False], device='cuda:0'), tensor([False], device='cuda:0'), tensor([False], device='cuda:0'), tensor([False], device='cuda:0'), tensor([False], device='cuda:0'), tensor([False], de

In [113]:
b_input_ids = add_prediction_to_input_batch(b_input_ids, current_prediction)

In [None]:
# predict again the next point with the new input



In [None]:

# losses
losses = 0
print("We predict")
# Predict
for batch in prediction_dataloader:
    # Add batch to GPU
    batch = tuple(t.to(device) for t in batch)

    # Unpack the inputs from our dataloader
    b_input_ids, b_input_mask, b_labels = batch

    # move to device
    b_input_ids = b_input_ids.to(device)
    b_input_mask = b_input_mask.to(device)
    b_labels = b_labels.to(device)

    # Telling the model not to compute or store gradients, saving memory and
    # speeding up prediction
    with torch.no_grad():
        # Forward pass, calculate logit predictions
        outputs = model(b_input_ids, token_type_ids=None, attention_mask=b_input_mask)
        # the ouputs are a tuple with the loss and the logits
        # the losses are the item 0 of the tuple
        # and the logits are the item 1 of the tuple
        # The loss is computed with the CrossEntropyLoss

    logits = outputs[0]
    losses += outputs[0].mean().item()

    # Move logits and labels to CPU
    logits = logits.detach().cpu().numpy()
    label_ids = b_labels.to("cpu").numpy()

    # Store predictions and true labels
    # we have to append  the max of the logits
    # because the logits are the output of the softmax
    # and the max of the logits is the class with the highest probability
    predictions.append(logits)
    true_labels.append(label_ids)

    # Store the inputs

    list_inputs_test.append(b_input_ids.tolist())

print("DONE.")


matthews_set = []

# Evaluate each test batch using Matthew's correlation coefficient
print("Calculating Matthews Corr. Coef. for each batch...")

pred_label = []
# compute the loss

# For each input batch...
for i in range(len(true_labels)):
    # The predictions for this batch are a 2-column ndarray (one column for "0"
    # and one column for "1"). Pick the label with the highest value and turn this
    # in to a list of 0s and 1s.
    pred_labels_i = np.argmax(predictions[i], axis=1).flatten()
    pred_label.append(pred_labels_i)
    # Calculate and store the coef for this batch.
    matthews = matthews_corrcoef(true_labels[i], pred_labels_i)
    matthews_set.append(matthews)


# Combine the predictions for each batch into a single list of 0s and 1s.
flat_predictions = [item for sublist in predictions for item in sublist]
flat_predictions = np.argmax(flat_predictions, axis=1).flatten()

# Combine the correct labels for each batch into a single list.
flat_true_labels = [item for sublist in true_labels for item in sublist]


# Combine the inputs for each batch into a single list.
flat_list_inputs_test = [item for sublist in list_inputs_test for item in sublist]

# Calculate the MCC
mcc = matthews_corrcoef(flat_true_labels, flat_predictions)

print("MCC: %.3f" % mcc)


# compute the accuracy
accuracy = (flat_true_labels == flat_predictions).mean()
print("accuracy: %.3f" % accuracy)

# print the loss
print("loss: %.3f" % (losses / len(true_labels)))


# save flat_list_inputs_test
