In [21]:
!pip install torchmetrics
!pip install portalocker
!pip install torcheval



In [22]:
from torchtext.models import T5_BASE_GENERATION
from torchtext.prototype.generate import GenerationUtils
from torchtext.datasets import IMDB
from torchvision.transforms import ToTensor
from functools import partial
from torch.utils.data import DataLoader
import torch
from torch import tensor
from torchmetrics.classification import BinaryCalibrationError
from torchmetrics.classification import MulticlassCalibrationError
from sklearn.isotonic import IsotonicRegression
from torcheval.metrics.functional import multiclass_f1_score

In [23]:
def batch_prefix(task, x):
	return {
	    "article": [f'{task}: ' + y for y in x["article"]],
	    "abstract": x["abstract"]
}

def apply_prefix(task, x):
	return f"{task}: " + x[0], x[1]


def process_labels(labels, x):
	  return x[1], labels[str(x[0])]

In [24]:
imdb_batch_size = 64
imdb_datapipe = IMDB(split="test")
task = "sst2 sentence"
labels = {"1": "negative", "2": "positive"}


# imdb_datapipe = imdb_datapipe.map(partial(process_labels, labels))
# imdb_datapipe = imdb_datapipe.map(partial(apply_prefix, task))
# imdb_datapipe = imdb_datapipe.batch(imdb_batch_size)
# imdb_datapipe = imdb_datapipe.rows2columnar(["text", "label"])
# imdb_dataloader = DataLoader(imdb_datapipe, batch_size=None, shuffle=True)

test_data = IMDB(
    split="test"
)
imdb_dataloader = DataLoader(test_data, batch_size=imdb_batch_size, shuffle=True)

In [25]:
t5_base = T5_BASE_GENERATION
transform = t5_base.transform()
model = t5_base.get_model(freeze_model=True)
model.eval()

T5_POSITIVE_LOGITS = 1465
T5_NEGATIVE_LOGITS = 2841

# sequence_generator = GenerationUtils(model)

padding_idx = 0
eos_idx = 1
max_seq_len = 512

In [28]:
targets = None
logits = None

data_count = 0

for batch in iter(imdb_dataloader):
	data_count += 1
	print(imdb_batch_size * data_count)

	# Datapipe Implementation:
	# input_text = batch["text"]
	# target = batch["label"]


	# Direct DataLoader Implementation:
	target_tensor = torch.zeros(len(batch[0]), 2)
	for i, data in enumerate(batch[0]):
		if(data == 1):
			target_tensor[i, :] = torch.tensor([1.0, 0.0])
		else:
			target_tensor[i, :] = torch.tensor([0.0, 1.0])

	if targets == None:
		targets = target_tensor
	else:
		targets = torch.cat((targets, target_tensor), dim = 0)

	input_text = list(batch[1])

	model_input = transform(input_text)
	temp = model(model_input)

	pos_logit = temp["decoder_output"][:,:,T5_POSITIVE_LOGITS]
	neg_logit = temp["decoder_output"][:,:,T5_NEGATIVE_LOGITS]

	z = torch.zeros(neg_logit.shape[0], 2)
	# Mention adaptation in report
	z[:, 0] += neg_logit[:, 0] / (neg_logit[:, 0] + pos_logit[:, 0])
	z[:, 1] += pos_logit[:, 0] / (neg_logit[:, 0] + pos_logit[:, 0])

	if logits == None:
		logits = z
	else:
		logits = torch.cat((logits, z), dim = 0)

	# if logits == None:
	# 	logits = logits
	# else:
	# 	targets = torch.cat((targets, target), dim = 0)

	# beam_size = 1
	# model_output = sequence_generator.generate(model_input, eos_idx=eos_idx, num_beams=beam_size)
	# output_text = transform.decode(model_output.tolist())

	# if(data_count * imdb_batch_size >= 88):
	# 	break

	# print(logits.shape)
	# print(logits)





64
128
192
256
320
384
448
512
576
640
704
768
832
896
960
1024
1088
1152
1216
1280
1344
1408
1472
1536
1600
1664
1728
1792
1856
1920
1984
2048
2112
2176
2240
2304
2368
2432
2496
2560
2624
2688
2752
2816
2880
2944
3008
3072
3136
3200
3264
3328
3392
3456
3520
3584
3648
3712
3776
3840
3904
3968
4032
4096
4160
4224
4288
4352
4416
4480
4544
4608
4672
4736
4800
4864
4928
4992
5056
5120
5184
5248
5312
5376
5440
5504
5568
5632
5696
5760
5824
5888
5952
6016
6080
6144
6208
6272
6336
6400
6464
6528
6592
6656
6720
6784
6848
6912
6976
7040
7104
7168
7232
7296
7360
7424
7488
7552
7616
7680
7744
7808
7872
7936
8000
8064
8128
8192
8256
8320
8384
8448
8512
8576
8640
8704
8768
8832
8896
8960
9024
9088
9152
9216
9280
9344
9408
9472
9536
9600
9664
9728
9792
9856
9920
9984
10048
10112
10176
10240
10304
10368
10432
10496
10560
10624
10688
10752
10816
10880
10944
11008
11072
11136
11200
11264
11328
11392
11456
11520
11584
11648
11712
11776
11840
11904
11968
12032
12096
12160
12224
12288
12352
12416
12480
12

In [54]:
metric = BinaryCalibrationError()
preds = torch.argmax(targets, dim=1)
calibration_error = metric(logits, targets)
f1_score = multiclass_f1_score(logits, preds, num_classes = 2)

print("Histogram Binning ECE:", str(round(100 * calibration_error.item(), 1)) + "%")
print("Histogram Binning F1 Score:", str(round(f1_score.item(), 4)))

# print(preds.shape)
targets_1d = torch.argmax(targets, dim=1)
# print(targets_1d.shape)
print(logits.shape)
train_size = 8 * targets_1d.shape[0] // 10
iso_logits = torch.zeros((logits.shape[0]))
arg_logits = torch.argmax(logits, dim = 1)
for i in range(logits.shape[0]):
    if(arg_logits[i] == 0):
        iso_logits[i] += logits[i, 0]
    else:
        iso_logits[i] -= logits[i, 1]
iso_model = IsotonicRegression().fit(iso_logits[0:train_size], targets_1d[0:train_size])
calibrated_preds = torch.Tensor(iso_model.predict(iso_logits[train_size:]))
calibration_error = metric(calibrated_preds, targets_1d[train_size:])
f1_score =  multiclass_f1_score(calibrated_preds, targets_1d[train_size:], num_classes = 2)

print("Isotonic Regression ECE:", str(100 * calibration_error.item()) + "%")
print("Isotonic Regression F1 Score:", str(round(f1_score.item(), 4)))

# Result of temperature optimization on local device
T = 0.8576
temperature_logits = logits / T

calibration_error = metric(temperature_logits, targets)
f1_score = multiclass_f1_score(temperature_logits, preds, num_classes = 2)

print("Temperature Scaling ECE:", str(round(100 * calibration_error.item(), 1)) + "%")
print("Temperature Scaling F1 Score:", str(round(f1_score.item(), 4)))

Histogram Binning ECE: 1.2%
Histogram Binning F1 Score: 0.0229
torch.Size([12500, 2])
Isotonic Regression ECE: 0.0%
Isotonic Regression F1 Score: 1.0
Temperature Scaling ECE: 8.8%
Temperature Scaling F1 Score: 0.0229
