In [None]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset
from transformers import BertTokenizer, BertConfig, BertForSequenceClassification
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import wandb
import pickle
from tqdm import tqdm
import nlp_utils as nu
import matplotlib.pyplot as plt
import string
print("PyTorch version:", torch.__version__)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device
wandb.login()
train_data_path = '../data/Subtask_1_train.json'
test_data_path = '../data/Subtask_1_test.json'
pickle_save_path = '../data/pickle/'
model_save_path = '../models/'
dataset = pd.read_json(train_data_path)
# train_df
dataset
conversations_df = pd.read_json(train_data_path)['conversation']
conversations_df[0]
all_conversations = []

for conv in conversations_df:
    dialog = []
    for sentence in conv:
        speaker = sentence['speaker']
        utterance = nu.preprocess_text(sentence['text'])
        dialog.append(utterance)
    all_conversations.append(dialog)
start_token = '<CLS>'
sep_token = '<SEP>'
all_conversations
def find_subsentence_indices(full_sentence, sub_sentence):
    start_index = full_sentence.find(sub_sentence)
    end_index = start_index + len(sub_sentence) - 1
    print(start_index, end_index, full_sentence[start_index:end_index+1])
    return start_index, end_index

def finding(dataset):
    # full_liso = []
    liso = []
    for i in dataset.values:
        id = i[0]
        emo_cau_pairs =  i[2]
        convo = i[1]
        liso.append({"convo" : id,
                     "pairs" : []})
        for j in emo_cau_pairs:
            a1 = j[0]
            a2 = j[1]
            checker1 = a1.split("_")[0]
            checker2 = a2.split("_")[0]
            num1 = int(checker1)
            num2 = int(checker2)
            s1 = a1[len(checker1)+1:]
            s2 = a2[len(checker2)+1:]
            for kk in convo:
                if(kk["utterance_ID"])==num2:
                    # print(kk['text'], s2)
                    a,b = find_subsentence_indices(nu.preprocess_text(kk["text"]), nu.preprocess_text(s2))
                    
                    liso[len(liso)-1]["pairs"].append([num1, num2, a, b])

    return liso

liso=finding(dataset)
cause_labels = []
for conv in liso:
    map = {}
    for arr in conv['pairs']:
        if not map.get(arr[0], None):
            map[arr[0]] = []
        map[arr[0]].append(arr[1])
    cause_labels.append(map)
def is_cause(target, utt, conv_id):
    if utt in cause_labels[conv_id].get(target, []):
        return 1
    return 0
def get_span(target, utt_id, utt, conv_id):
    for x in liso[conv_id]['pairs']:
        if x[0] == target and x[1] == utt_id:
            return utt[x[2]:x[3]+1]
    return ''
all_tc_pairs = []
all_tc_labels = []
all_tc_spans = []

for conv_id, conv in enumerate(all_conversations):
    for target_id, target in enumerate(conv):
        for utt_id, utt in enumerate(conv):
            all_tc_pairs.append([target, utt])
            all_tc_labels.append(is_cause(target_id+1, utt_id+1, conv_id))
            if is_cause(target_id+1, utt_id+1, conv_id):
                all_tc_spans.append(get_span(target_id+1, utt_id+1, utt, conv_id))
            else:
                all_tc_spans.append('')

all_tc_pairs = np.array([np.array(x) for x in all_tc_pairs], dtype='object')
all_tc_labels = np.array(all_tc_labels)
all_tc_spans = np.array(all_tc_spans, dtype='object')
all_tc_spans[15:20]
all_tc_pairs
print(all_tc_labels[:20])
train_tc_pairs, dev_tc_pairs, train_tc_labels, dev_tc_labels, train_tc_spans, dev_tc_spans = train_test_split(all_tc_pairs, all_tc_labels, all_tc_spans, test_size=0.2, random_state=42)
X_train_context_input_ids = torch.load(pickle_save_path + 'X_train_tc_input_ids.pt')
X_train_context_attention_masks = torch.load(pickle_save_path + 'X_train_tc_attention_masks.pt')
train_labels = torch.load(pickle_save_path + 'train_labels_tc.pt')

X_dev_context_input_ids = torch.load(pickle_save_path + 'X_dev_tc_input_ids.pt')
X_dev_context_attention_masks = torch.load(pickle_save_path + 'X_dev_tc_attention_masks.pt')
dev_labels = torch.load(pickle_save_path + 'dev_labels_tc.pt')
config = BertConfig.from_pretrained('../models/crc-bert-m1', num_labels=2)
model = BertForSequenceClassification.from_pretrained('../models/crc-bert-m1', config=config)
model.to(device)
train_dataset = TensorDataset(X_train_context_input_ids, X_train_context_attention_masks, train_labels)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

dev_dataset = TensorDataset(X_dev_context_input_ids, X_dev_context_attention_masks, dev_labels)
dev_loader = DataLoader(dev_dataset, batch_size=16, shuffle=True)
model.eval()
utt_predictions = []
true_labels = []

with torch.no_grad():
    for batch in tqdm(dev_loader, desc=f"Eval Minibatch"):
        input_ids, attention_mask, labels = batch
        input_ids.to(device)
        attention_mask.to(device)
        labels.to(device)
        outputs = model(input_ids, attention_mask=attention_mask)
        utt_predictions.append(np.argmax(outputs.logits.to('cpu').numpy(), axis=1))
        true_labels.extend(labels.cpu().numpy())
utt_predictions = np.concatenate(utt_predictions, axis=0)
accuracy = accuracy_score(true_labels, utt_predictions)
f1 = f1_score(true_labels, utt_predictions, average='weighted')
precision = precision_score(true_labels, utt_predictions, average='weighted')
recall = recall_score(true_labels, utt_predictions, average='weighted')
print(classification_report(true_labels, utt_predictions))
print(f'Accuracy: {accuracy}')
print(f'Precision: {precision}')
print(f'Recall: {recall}')
print(f'F1 Score: {f1}')
from transformers import AutoModelForQuestionAnswering

span_model = AutoModelForQuestionAnswering.from_pretrained("../models/span_nz").to('cuda')
questions = dev_tc_pairs[:, 0]
contexts = dev_tc_pairs[:, 1]
utt_predictions[utt_predictions == 1].shape
answers = []

for i, utt in enumerate(questions):
    if utt_predictions[i] == 1:
        answers.append(dev_tc_labels[i])
    else:
        answers.append(0)

len(answers)
dev_tc_labels[dev_tc_labels == 1].shape
utt_predictions[utt_predictions == 1].shape
from transformers import pipeline

question_answerer = pipeline("question-answering", model="../models/span_nz", tokenizer="../models/span_nz", device=device)
predictions = question_answerer(question=questions.tolist(), context=contexts.tolist())
predictions
import evaluate

def findEvalMetrics(true_labels, predictions):
    bleu = evaluate.load("bleu")
    results_bleu1 = bleu.compute(predictions=predictions, references=true_labels,max_order = 1)
    results_bleu2 = bleu.compute(predictions=predictions, references=true_labels,max_order = 2)
    results_bleu3 = bleu.compute(predictions=predictions, references=true_labels,max_order = 3)
    results_bleu4 = bleu.compute(predictions=predictions, references=true_labels,max_order = 4)

    results_bleu=[results_bleu1, results_bleu2, results_bleu3, results_bleu4]

    meteor = evaluate.load("meteor")
    results_meteor = meteor.compute(predictions=predictions, references=true_labels)

    return [results_bleu,results_meteor]
len(predictions)
# true_labels = [x['text'][0] if x['text'][0] != '' else 'null' for x in raw_datasets['validation']['answers']]
true_labels = dev_tc_spans
true_labels.shape
true_labels.tolist()
answers
# pred_labels = [x['answer'] if len(x['answer'].split()) != 1 else 'null' for x in predictions]
# pred_labels = [x['answer'] if len(x['answer'].split()) != 1 else '' for x in predictions]
pred_labels = [x['answer'] if answers[i] ==1 else '' for i, x in enumerate(predictions)]
pred_labels
metrics = findEvalMetrics(true_labels, pred_labels)
metrics
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report, f1_score
accuracy = accuracy_score(true_labels, pred_labels)
precision = precision_score(true_labels, pred_labels, average='weighted', zero_division=1)
recall = recall_score(true_labels, pred_labels, average='weighted', zero_division=1)
f1 = f1_score(true_labels, pred_labels, average='weighted', zero_division=1)
accuracy, precision, recall, f1
context = 'all right look you are not really gonna buy that are you do not you think you have embarrassed me enough for one day'
question = 'oh i embarrass you'
ans = 'you have embarrassed me enough for one day'
question = 'Ohh , you are about to get a little luckier .'
context = 'You look amazing . I am the luckiest man in the world .'
answer = 'You look amazing . I am the luckiest man in the world .'
from transformers import pipeline

question_answerer = pipeline("question-answering", model="../models/span_nz", tokenizer="../models/span_nz", device=device)
test_predictions = question_answerer(question=question, context=context)
test_predictions
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("my_awesome_qa_model")
inputs = tokenizer(question, context, return_tensors="pt")
import torch
from transformers import AutoModelForQuestionAnswering

model = AutoModelForQuestionAnswering.from_pretrained("my_awesome_qa_model")
with torch.no_grad():
    outputs = model(**inputs)
answer_start_index = outputs.start_logits.argmax()
answer_end_index = outputs.end_logits.argmax()
predict_answer_tokens = inputs.input_ids[0, answer_start_index : answer_end_index + 1]
tokenizer.decode(predict_answer_tokens)