In [1]:
from datasets import load_dataset, Dataset
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification
from transformers import DataCollatorWithPadding 
from datasets import load_metric
import torch
import json
from tqdm import tqdm

# Inference

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
from peft import PeftModel
import numpy as np
import re
from sentence_transformers import SentenceTransformer, util

# Our Saved Model

In [4]:
model = AutoModelForSequenceClassification.from_pretrained('ai_train_20_04_24')
tokenizer = AutoTokenizer.from_pretrained('ai_train_20_04_24')

In [5]:
def get_splited_docs(doc, max_words_per_chunk=300):
    words = doc.split(" ")
    chunks = [words[i:i + max_words_per_chunk] for i in range(0, len(words), max_words_per_chunk)]
    result = [' '.join(chunk).strip("\n") for chunk in chunks]
    return result

In [6]:
def split_into_sentences(text):
    sentence_endings = r"""(?<=[.!?])\s"""
    sentences = re.split(sentence_endings, text)
    sentences = [s.strip() for s in sentences if s]  
    return sentences

In [7]:
def preprocess(text):
    return " ".join(text.lower().split())

In [8]:
def modifiing_output_for_display(a):
    total_string = ''
    for key,value in a.items():
        total_string += key

        all_ref = []
        for i in value:
            if i["score"] == 0 or len(i["sent"]) == 0:
                continue
            else:
                sents = ",".join(map(str, i["sent"]))
                ref = '(doc: {}, chunk_inx: {})'.format(i["doc"], sents)
                all_ref.append(ref)
        if len(all_ref) == 0:
            continue
        else:
            total_string += " ref: " + str(all_ref) + "."
    return total_string

In [9]:
def get_overall_prediction(statement,all_docs):
    answers = split_into_sentences(statement)
 
    all_results = {}
    overall_scores_each_answer = []
    for answer in answers:
        scores = []
        for i,full_doc in enumerate(all_docs):
            splitted_docs = get_splited_docs(full_doc)
#             _, semantic_sentences_idx = get_semantic_sentences(answer,full_doc)
            all_scores = [infer(answer,doc) for doc in splitted_docs]
            semantic_sentences_idx = [i for i, j in enumerate(all_scores) if j == 1]
            if (1 in all_scores):
                final_score = 1
            else:
                final_score = 0
            scores.append({'score': final_score, 'doc': i, "sent": semantic_sentences_idx}) 
        
        all_results[answer] = scores
        overall_scores_each_answer.append(max([i['score'] for i in scores]))
    
    overall_score = np.mean(np.array(overall_scores_each_answer))
    
    all_results = modifiing_output_for_display(all_results)
    
    return {"overall_score": np.round(overall_score,2), "info": all_results}

In [10]:
def infer(statement,doc):
    pairs = [[doc, statement]]

    inputs = tokenizer.batch_encode_plus(pairs, return_tensors='pt', padding=True)

    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits.cpu().detach().numpy()
        # convert logits to probabilities
        scores = 1 / (1 + np.exp(-logits)).flatten()
    if scores[0] > 0.5:
        return 1
    else:
        return 0

# Testing_datasets:

# dataset 1 ----  XNLI dataset

In [11]:
xnli_dataset = load_dataset("xnli","en")

In [12]:
test_dataset_1 = xnli_dataset["test"].to_pandas()

In [13]:
test_dataset_1

Unnamed: 0,premise,hypothesis,label
0,"Well, I wasn't even thinking about that, but I...",I havent spoken to him again.,2
1,"Well, I wasn't even thinking about that, but I...",I was so upset that I just started talking to ...,0
2,"Well, I wasn't even thinking about that, but I...",We had a great talk.,1
3,"And I thought that was a privilege, and it's s...",I was not aware that I was not the only person...,1
4,"And I thought that was a privilege, and it's s...",I was under the impression that I was the only...,0
...,...,...,...
5005,Davidson should not adopt the pronunciation of...,Davidson shouldn't talk in a way where bone an...,0
5006,Davidson should not adopt the pronunciation of...,It would be better if Davidson rhymed the word...,2
5007,"The average novel of 200,000 words for $25 wor...","A 200,000 word novel at $25 is a fair price.",1
5008,"The average novel of 200,000 words for $25 wor...","A 200,000 word novel for $25 is 4,000 words pe...",2


In [14]:
facebook_test_data = load_dataset("facebook/anli")

# dataset-2 ---- facebook/anli-test-1

In [15]:
test_dataset_2 = facebook_test_data["test_r1"]

In [16]:
test_dataset_2 = test_dataset_2.to_pandas()

# dataset-3 ---- facebook/anli-test-2

In [17]:
test_dataset_3 = facebook_test_data["test_r2"]

In [18]:
test_dataset_3 = test_dataset_3.to_pandas()

# dataset-4 ---- facebook/anli-test-3

In [19]:
test_dataset_4 = facebook_test_data["test_r3"]

In [20]:
test_dataset_4 = test_dataset_4.to_pandas()

In [21]:
all_test_datasets = [test_dataset_1, test_dataset_2 ,test_dataset_3, test_dataset_4]

In [22]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

In [24]:
total_results = []
i=0
for data in tqdm(all_test_datasets):
    i = i+1
    req_dataset = data[data["label"] != 1 ]
    gt = [0 if i == 0 else 1 for i in req_dataset["label"].to_list()]
    all_comp = []
    for row in req_dataset.iterrows():
        label = infer(row[1]["premise"],row[1]["hypothesis"])
        all_comp.append(label)
    
    accuracy = accuracy_score(gt, all_comp)
    precision = precision_score(gt, all_comp)
    recall = recall_score(gt, all_comp)
    f1=f1_score(gt, all_comp)
    all_results = {}
    all_results["dataset_name"] = f"Test_Datset_{i}"
    all_results["accuracy"] = accuracy
    all_results["precision"] = precision
    all_results["recall"] = recall
    all_results["f1"] = f1
    total_results.append(all_results)

100%|██████████| 4/4 [08:24<00:00, 126.15s/it]


In [25]:
pd.DataFrame(total_results).to_csv("results_our_deberta.csv",index=False)