In [1]:
from datasets import load_dataset, Dataset
import pandas as pd
import numpy as np
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoModelForCausalLM
from transformers import DataCollatorWithPadding 
from datasets import load_metric
import torch
import json
from tqdm import tqdm
from sklearn.metrics import accuracy_score, precision_score, recall_score
from peft import PeftModel
import re
from sentence_transformers import SentenceTransformer, util

In [2]:
model = AutoModelForSequenceClassification.from_pretrained('ai_train_04_03_2024')
tokenizer = AutoTokenizer.from_pretrained('ai_train_04_03_2024')

In [3]:
def get_splited_docs(text, max_chunk_words=50):
    sentences = re.split(r'(?<=[.!?])\s+', text)
    chunks = []
    current_chunk = ''
    current_chunk_word_count = 0

    for sentence in sentences:
        words_in_sentence = len(sentence.split())
        if current_chunk_word_count + words_in_sentence > max_chunk_words:
            chunks.append(current_chunk)
            current_chunk = sentence
            current_chunk_word_count = words_in_sentence
        else:
            if current_chunk:
                current_chunk += ' '
            current_chunk += sentence
            current_chunk_word_count += words_in_sentence
    if current_chunk:
        chunks.append(current_chunk)

    return chunks

In [4]:
def split_into_sentences(text):
    sentence_endings = r"""(?<=[.!?])\s"""
    sentences = re.split(sentence_endings, text)
    sentences = [s.strip() for s in sentences if s]  
    return sentences

In [5]:
def preprocess(text):
    return " ".join(text.lower().split())

In [6]:
def modifiing_output_for_display(a):
    total_string = ''
    for key,value in a.items():
        total_string += key

        all_ref = []
        for i in value:
            if i["score"] == 0 or len(i["sent"]) == 0:
                continue
            else:
                sents = ",".join(map(str, i["sent"]))
                ref = '(doc: {}, chunk_inx: {})'.format(i["doc"], sents)
                all_ref.append(ref)
        if len(all_ref) == 0:
            continue
        else:
            total_string += " ref: " + str(all_ref) + "."
    return total_string

In [7]:
def get_overall_prediction(statement,all_docs):
    answers = split_into_sentences(statement)
 
    all_results = {}
    overall_scores_each_answer = []
    for answer in answers:
        scores = []
        for i,full_doc in enumerate(all_docs):
            splitted_docs = get_splited_docs(full_doc)
#             _, semantic_sentences_idx = get_semantic_sentences(answer,full_doc)
            all_scores = [infer(answer,doc) for doc in splitted_docs]
            semantic_sentences_idx = [i for i, j in enumerate(all_scores) if j == 1]
            if (1 in all_scores):
                final_score = 1
            else:
                final_score = 0
            scores.append({'score': final_score, 'doc': i, "sent": semantic_sentences_idx}) 
        
        all_results[answer] = scores
        overall_scores_each_answer.append(max([i['score'] for i in scores]))
    
    overall_score = np.mean(np.array(overall_scores_each_answer))
    
    all_results = modifiing_output_for_display(all_results)
    
    return {"overall_score": np.round(overall_score,2), "info": all_results}

In [70]:
def infer(statement,doc):
    pairs = [[doc, statement]]

    inputs = tokenizer.batch_encode_plus(pairs, return_tensors='pt', padding=True)

    model.eval()
    with torch.no_grad():
        outputs = model(**inputs)
        logits = outputs.logits.cpu().detach().numpy()
        # convert logits to probabilities
        scores = 1 / (1 + np.exp(-logits)).flatten()
#     return scores
    if scores[0] > 0.5:
        return 1
    else:
        return 0

In [9]:
desc1 = """ Nikhil is a passionate AI enthusiast currently pursuing his master's degree in Artificial Intelligence. With a unique blend of experience as a former SDE at LTIMindtree, where he collaborated with an international bank to develop a mobile application, he brings in-depth expertise in software development. His tenure at LTIMindtree honed his skills in writing efficient code, utilizing tools like Git, JIRA, Postman API, MySQL, and PostgreSQL, and fostering cross-team collaboration, communication, and teamwork."""

desc2 = """During his undergraduate years, he delved into the realm of machine learning as an intern at NHAI, where he focused on predicting factors leading to road rutting. His exposure to Google Cloud Platform (GCP) and its suite of tools such as BigQueryML, LookML, AutoML, and VertexAPI further augmented his skill set. He can proficiently handle tasks ranging from creating virtual machines and VPNs to executing complex data analyses using BigQueryML and developing insightful visualizations with Tableau."""

desc3 = """With a keen interest in harnessing the power of data, he is adept at statistical analysis, numerical optimization, and implementing machine learning and deep learning algorithms. His expertise spans Python, C, R, PyTorch, TensorFlow, Keras, Selenium, Hadoop, Spark, PostgreSQL, MySQL, and web scraping, enabling him to tackle diverse challenges in data engineering, machine learning, and data analysis. As he embark on the next phase of his career journey, he is eagerly seeking internship opportunities that align with his passion for data-driven insights and innovation."""

desc4 = """He had the opportunity to work on diverse projects that showcase his skills and interests in AI and data science. For instance, one of his projects involved enhancing LLM Reliability through an Automated Fact-Checking System using a Cross-Encoder Model. He trained the model with a vast dataset of one million instances, deployed it on Google Cloud Platform for efficient real-time fact verification, and implemented a validation mechanism to ensure transparency and trustworthiness in information dissemination."""

desc = [desc1, desc2, desc3, desc4]

In [10]:
st1 = "Nikhil is pursuing masters."
st2 = "Nikhil knows python."
st3 = "Nikhil is pursuing masters in Cybersecurity."

sts = [st1, st2, st3]
for st in sts:
    print('statement: ',st)
    result = get_overall_prediction(st, desc)
    print('fact_result: ',result, '\n\n')

statement:  Nikhil is pursuing masters.
fact_result:  {'overall_score': 1.0, 'info': "Nikhil is pursuing masters. ref: ['(doc: 0, chunk_inx: 0)']."} 


statement:  Nikhil knows python.
fact_result:  {'overall_score': 1.0, 'info': "Nikhil knows python. ref: ['(doc: 2, chunk_inx: 1)']."} 


statement:  Nikhil is pursuing masters in Cybersecurity.
fact_result:  {'overall_score': 0.0, 'info': 'Nikhil is pursuing masters in Cybersecurity.'} 




In [11]:
infer(st1, desc1)

1

In [12]:
infer(st3, desc1)

0

## Demo data

In [13]:
st4 = "Nikhil completed his masters"
print('statement: ',st4)
result = get_overall_prediction(st4, desc)
print('fact_result: ',result)

statement:  Nikhil completed his masters
fact_result:  {'overall_score': 0.0, 'info': 'Nikhil completed his masters'}


In [14]:
st5 = "Nikhil worked on various projects"
print('statement: ',st5)
result = get_overall_prediction(st5, desc)
print('fact_result: ',result)

statement:  Nikhil worked on various projects
fact_result:  {'overall_score': 1.0, 'info': "Nikhil worked on various projects ref: ['(doc: 0, chunk_inx: 0,1)', '(doc: 2, chunk_inx: 1)', '(doc: 3, chunk_inx: 0)']."}


## Evaluation Metrics

In [15]:
from datasets import load_dataset

paws_dataset = load_dataset("paws", "labeled_final")

In [16]:
paws_test = paws_dataset["test"].to_pandas()

In [17]:
from sentence_transformers import InputExample
paws_test_examples = []
for i, row in paws_test.iterrows():
    paws_test_examples.append(InputExample(texts=[row['sentence2'], row['sentence1']], label= int(row['label'])))

In [21]:
predictions = []

for example in paws_test_examples:
    prediction = infer(example.texts[0], example.texts[1])  
    predictions.append(prediction)

In [22]:
true_labels = [example.label for example in paws_test_examples]

accuracy = accuracy_score(true_labels, predictions)
precision = precision_score(true_labels, predictions)
recall = recall_score(true_labels, predictions)

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

Accuracy: 0.9755
Precision: 0.9651810584958217
Recall: 0.979920814479638


In [71]:
test_data = pd.read_json("test.jsonl", lines=True)
test_data.head()

Unnamed: 0,id,premise,hypothesis,gold,genre,pairID
0,259676,"In the past, I have found that there is no poi...",You should prepare a speech.,entailment,generated,244447
1,88755,There is a persistent myth that the Egyptian m...,The Egyptian military was involved in the assa...,neutral,generated,365127
2,270193,The party of the proletariat is the party of t...,The party of the proletariat is the party of t...,neutral,generated,310832
3,164269,"If you're a good swimmer, it's a good idea to ...",The shallow end of the pool is good for swimming.,entailment,generated,36248
4,6435,"I was not in a position to take any action, bu...",The man did not have the power to take any act...,entailment,generated,52946


In [72]:
test_data = test_data[test_data["gold"]!= "neutral"]

In [73]:
test_data_labels = []
for i in test_data["gold"]:
    if i == "entailment":
        test_data_labels.append(1)
    else:
        test_data_labels.append(0)

In [74]:
test_data["label"] = test_data_labels
test_data.head()

Unnamed: 0,id,premise,hypothesis,gold,genre,pairID,label
0,259676,"In the past, I have found that there is no poi...",You should prepare a speech.,entailment,generated,244447,1
3,164269,"If you're a good swimmer, it's a good idea to ...",The shallow end of the pool is good for swimming.,entailment,generated,36248,1
4,6435,"I was not in a position to take any action, bu...",The man did not have the power to take any act...,entailment,generated,52946,1
6,15293,"""I'll be sure to leave the lights on for you,""...",She'll be sure to leave the lights off for you.,contradiction,generated_revised,339475,0
7,264807,This small town has been known for its dairy p...,"This town is known for its dairy products, and...",entailment,generated,272068,1


In [75]:
test_examples = []
for i, row in test_data.iterrows():
    test_examples.append(InputExample(texts=[row['premise'], row['hypothesis']], label= int(row['label'])))

In [76]:
test_predictions = []

for example in test_examples:
    prediction = infer(example.texts[0], example.texts[1])  
    test_predictions.append(prediction)

In [80]:
from sklearn.metrics import f1_score
true_labels = [example.label for example in test_examples]

precision = precision_score(true_labels, test_predictions)
recall = recall_score(true_labels, test_predictions)
f1 = f1_score(true_labels, test_predictions)

print("Precision:", precision)
print("Recall:", recall)
print("F1-Score:", f1)

Precision: 0.9376558603491272
Recall: 0.6071044133476857
F1-Score: 0.7370140476968311
