<a href="https://colab.research.google.com/github/SAR2652/ML-Project/blob/main/T5_Inference.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [17]:
!pip install transformers datasets sentencepiece



In [18]:
import pandas as pd
import numpy as np
import random
import torch
from torch.utils.data import Dataset, DataLoader
from transformers import T5Tokenizer, T5ForConditionalGeneration, AdamW, get_linear_schedule_with_warmup
from nltk.translate.bleu_score import sentence_bleu
from tqdm import tqdm, trange
from datasets import load_dataset
import torch.nn.functional as F
import csv
import os

In [19]:
tokenizer = T5Tokenizer.from_pretrained('t5-base')

In [20]:
# data_path = '/content/drive/MyDrive/NYU courses/Sem 2/ML/Project/ml_data'
# data_path = '/content/drive/My Drive/failing'
data_path = '/content/drive/My Drive/test_data'
# data_path = '/content/drive/MyDrive/validation'
data = load_dataset(data_path)

Using custom data configuration test_data-6307297f2be303ed


Downloading and preparing dataset csv/test_data to /root/.cache/huggingface/datasets/csv/test_data-6307297f2be303ed/0.0.0/433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519...


Downloading data files:   0%|          | 0/1 [00:00<?, ?it/s]

Extracting data files:   0%|          | 0/1 [00:00<?, ?it/s]

Dataset csv downloaded and prepared to /root/.cache/huggingface/datasets/csv/test_data-6307297f2be303ed/0.0.0/433e0ccc46f9880962cc2b12065189766fbb2bee57a221866138fb9203c83519. Subsequent calls will reuse this data.


  0%|          | 0/1 [00:00<?, ?it/s]

In [23]:
data

DatasetDict({
    test: Dataset({
        features: ['Problem', 'Rationale'],
        num_rows: 250
    })
})

In [24]:
class MathQAData(Dataset):  
    def __init__(self, control_code, max_length=1024):

        self.problems = tokenizer(control_code['Problem'], max_length = max_length, padding = 'max_length', truncation = True, return_tensors = "pt")
        self.rationales = tokenizer(control_code['Rationale'], max_length = max_length, padding = 'max_length', truncation = True, return_tensors = "pt")
        # self.rationales = []
        # for item in control_code['Rationale']:
        #     self.rationales.append(tokenizer.encode(item, max_length = max_length, padding = 'max_length', truncation = True, return_tensors = "pt"))

        self.count = len(self.problems['input_ids'])
        
    def __len__(self):
        return self.count

    def __getitem__(self, idx):
        example = dict()
        example['input'] = dict()
        example['output'] = dict()
        example['input']['input_ids'] = self.problems['input_ids'][idx]
        example['input']['attention_mask'] = self.problems['attention_mask'][idx]
        example['output']['input_ids'] = self.rationales['input_ids'][idx]
        example['output']['attention_mask'] = self.rationales['attention_mask'][idx]
        return example
    
dataset = MathQAData(data['test'])      

In [25]:
dataset.__len__()

250

In [26]:
model = T5ForConditionalGeneration.from_pretrained('t5-base')
model.load_state_dict(torch.load("/content/drive/My Drive/ml_models/t5_base_epoch_28.pth"))

<All keys matched successfully>

In [27]:
device = torch.device('cuda')

In [28]:
eval_dataset = MathQAData(data['test'])      
eval_dataloader = DataLoader(eval_dataset, batch_size=1)

In [29]:
model = model.to(device)

In [30]:
def generate_rationale(prompt):
    attention_mask = prompt['input']['attention_mask'].to(device)
    input_ids = prompt['input']['input_ids'].to(device)
    generated_ids = model.generate(input_ids = input_ids, attention_mask = attention_mask,
                                   max_length = 1024,
                                   num_beams = 1)
    
    return [tokenizer.decode(gen_id, skip_special_tokens = True) for gen_id in generated_ids]

In [34]:
count = 0
for entry in eval_dataloader:
    words = generate_rationale(entry)
    print("Problem: ")
    print(data['test']['Problem'][count])
    print("Rationale: ")
    print(data['test']['Rationale'][count])
    count += 1
    print("Generated Rationale: ")
    print(" ".join(words))
    if count > 4:
        break

Problem: 
how long does a train 110 m long traveling at 60 kmph takes to cross a bridge of 390 m in length ?
Rationale: 
"d = 110 + 390 = 500 m s = 60 * 5 / 18 = 50 / 3 t = 500 * 3 / 50 = 30 sec answer : d"
Generated Rationale: 
how long does a train 110 m long traveling at 60 kmph take to cross a bridge of 390 m in length take to cross a bridge of 390 m in length?
Problem: 
find the average of all the numbers between 6 and 34 which is divisible by 5
Rationale: 
"avg = ( 10 + 15 + 20 + 25 + 30 ) / 5 = 100 / 5 = > 20 answer e"
Generated Rationale: 
6 and 34. Find the average of all the numbers between 6 and 34 which is divisible by 5. Find the average of all the numbers between 6 and 34 which is divisible by 5. Find the average of all the numbers between 6 and 34 which is divisible by 5. Find the average of all the numbers between 6 and 34 which is divisible by 5. Find the average of all the numbers between 6 and 34 which is divisible by 5. Find the average of all the numbers between 6 

In [35]:
bleu_scores = []
count = 0
for entry in eval_dataloader:
    words = generate_rationale(entry)
    ref = data['test']['Rationale'][count]
    sent = ' '.join(words)
    count += 1
    bleu_scores.append(sentence_bleu(ref, sent))

Corpus/Sentence contains 0 counts of 2-gram overlaps.
BLEU scores might be undesirable; use SmoothingFunction().


In [36]:
import statistics
statistics.mean(bleu_scores)

0.5920296182955308