# Fine-tune FLAN-T5 for CG Classification from Belief

## 1. Setup Development Environment

In [None]:
!pip install pytesseract transformers==4.28.1 datasets evaluate rouge-score nltk tensorboard py7zr
# from huggingface_hub import notebook_login
# notebook_login()

### Connect to Drive

In [2]:
import pandas as pd
import matplotlib.pyplot as plt
import os
import re
import glob
from datasets import load_dataset
import datasets

from google.colab import drive
drive.mount('/content/drive')

## 2. Load and prepare dataset

In [34]:
import pickle

df = pd.read_csv("/content/48BelA_36BelB_2previous.csv")

In [35]:
df

Unnamed: 0.1,Unnamed: 0,Speaker,Sentence_Number,Sentence,Event,Target_Event,Predicted Bel(A),Predicted Bel(B),Bel(A),Bel(B),CG(A),CG(B)
0,0,B,1,B: %um I took them to %uh &Jill’s and they spe...,Previous Sentences: \nTarget Sentence: B took ...,B took the kids to Jill's,1,1,1,1,1,1
1,1,B,1,,Previous Sentences: B took the kids to Jill's ...,The kids spent two days at Jill's,1,1,1,1,1,1
2,2,B,1,,Previous Sentences: B took the kids to Jill's ...,B guesses Jill couldn't take the kids,1,1,1,1,1,1
3,3,B,1,,Previous Sentences: B took the kids to Jill's ...,Jill couldn't take the kids,3,3,3,3,1,1
4,4,B,1,,Previous Sentences: B took the kids to Jill's ...,The kid's mom and dad came,1,1,1,1,1,1
...,...,...,...,...,...,...,...,...,...,...,...,...
315,315,A,144,,Previous Sentences: A asks B if B knows what A...,A's sons never treat one another like A and B'...,1,1,3,3,1,1
316,316,B,145,B: True. I don’t think my kids will be that wa...,Previous Sentences: B doesn't know what A said...,B doesn't think B's kids will be like A and B'...,1,1,1,1,1,1
317,317,B,145,,Previous Sentences: B doesn't know what A said...,B's kids will be like A and B's mom and dad,3,3,3,3,1,1
318,318,A,146,A: And he just looked at me. [channel noise],Previous Sentences: A said A hopes that A's so...,A and B's dad just looked at A,1,1,1,1,1,1


## 3. load Model and Test pretrained model

In [36]:
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

save_directory = "/content/drive/MyDrive/Common Ground Docs/Models/FlanT5_Final_Model_CG_Classification_82CGA_82CGB"
tokenizer = AutoTokenizer.from_pretrained(save_directory)
model = AutoModelForSeq2SeqLM.from_pretrained(save_directory)

In [None]:
model.to('cuda')

In [40]:
from tqdm.auto import tqdm

samples_number = len(df['CG(A)'])
progress_bar = tqdm(range(samples_number))
predictions_list = []
labels_list = []
for i in range(samples_number):
  text = f"Bel(A)={df['Bel(A)'][i]},Bel(B)={df['Bel(B)'][i]}\n\nInput Event with Context:\n{df['Event'][i]}"
  inputs = tokenizer.encode_plus(text, padding='max_length', max_length=512, return_tensors='pt').to('cuda')
  outputs = model.generate(inputs['input_ids'], attention_mask=inputs['attention_mask'], max_length=150, num_beams=4, early_stopping=True)
  prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
  predictions_list.append(prediction)
  labels_list.append(str(df['CG(A)'][i]))

  progress_bar.update(1)

  0%|          | 0/320 [00:00<?, ?it/s]

In [41]:
from sklearn.metrics import classification_report

report = classification_report(labels_list, predictions_list, zero_division=0)
print(report)

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.92      0.96      0.94       241
           2       0.61      0.47      0.53        30
           3       0.78      1.00      0.88        35

    accuracy                           0.88       320
   macro avg       0.58      0.61      0.59       320
weighted avg       0.84      0.88      0.85       320



In [42]:
from tqdm.auto import tqdm

samples_number = len(df['CG(A)'])
progress_bar = tqdm(range(samples_number))
predictions_list = []
labels_list = []
for i in range(samples_number):
  text = f"Bel(A)={df['Predicted Bel(A)'][i]},Bel(B)={df['Predicted Bel(B)'][i]}\n\nInput Event with Context:\n{df['Event'][i]}"
  inputs = tokenizer.encode_plus(text, padding='max_length', max_length=512, return_tensors='pt').to('cuda')
  outputs = model.generate(inputs['input_ids'], attention_mask=inputs['attention_mask'], max_length=150, num_beams=4, early_stopping=True)
  prediction = tokenizer.decode(outputs[0], skip_special_tokens=True)
  predictions_list.append(prediction)
  labels_list.append(str(df['CG(A)'][i]))

  progress_bar.update(1)

  0%|          | 0/320 [00:00<?, ?it/s]

In [43]:
from sklearn.metrics import classification_report

report = classification_report(labels_list, predictions_list, zero_division=0)
print(report)

              precision    recall  f1-score   support

           0       0.00      0.00      0.00        14
           1       0.81      0.96      0.88       241
           2       0.54      0.23      0.33        30
           3       0.45      0.29      0.35        35

    accuracy                           0.78       320
   macro avg       0.45      0.37      0.39       320
weighted avg       0.71      0.78      0.73       320

