In [1]:
from transformers import pipeline
import pandas as pd

from transformers import BartTokenizer, BartForConditionalGeneration
from tqdm import tqdm
import numpy as np
import evaluate
import textstat

from src.RAG_Calculater import RAG, get_top_n_articles
from src.Prompt_Factory import prompt_factory
from src.Case_Builder import (device,
                              bert_version,
                              genai_version,
                              genai_model_name,
                              dataset_name,
                              massage_strategy
                              )

In [2]:
device

device(type='cpu')

In [3]:
data_train = pd.read_json(f'src/dataset/clean/{dataset_name}/{bert_version}_train.json')
data_val = pd.read_json(f'src/dataset/clean/{dataset_name}/{bert_version}_validation.json')

In [4]:
data_val = data_val.loc[:5].copy()

In [5]:
data_train['rag_sentences'] = data_train['sentences_similarity'].apply(RAG)
data_val['rag_sentences'] = data_val['sentences_similarity'].apply(RAG)

In [6]:
chatbot = pipeline("text-generation", model=genai_model_name, min_new_tokens=256, max_new_tokens=512, repetition_penalty=1.2, no_repeat_ngram_size=3)

Hardware accelerator e.g. GPU is available in the environment, but no `device` argument is passed to the `Pipeline` object. Model will be on CPU.


In [11]:
# ROUGE and BERTScore
rouge = evaluate.load('rouge')
bertscore = evaluate.load("bertscore", device=device)

# Load a pre-trained BART model for BARTScore
bart_model_name = "facebook/bart-large-cnn"
bart_tokenizer = BartTokenizer.from_pretrained(bart_model_name)
bart_model = BartForConditionalGeneration.from_pretrained(bart_model_name)

def get_results(results):
    batch_size = 5
    
    # Compute ROUGE metrics
    #print("ROUGE Metrics Calculater:")
    rouge_results = rouge.compute(
        predictions=results['prediction'],
        references=results['reference'],
        use_aggregator=True,
        use_stemmer=True,
    )
    # Compute BERTScore
    #print("BERTScore Calculater:")
    bertscore_results = {
        "precision": [],
        "recall": [],
        "f1": [],
    }
    for idx in tqdm(range(0, len(results), batch_size)):
        str_idx = idx
        end_idx = idx + batch_size
        tmp_bertscore_results = bertscore.compute(
            predictions=results['prediction'][str_idx:end_idx].to_list(),
            references=results['reference'][str_idx:end_idx].to_list(),
            model_type="microsoft/deberta-xlarge-mnli",
            device=device,
        )
        bertscore_results["precision"].extend(tmp_bertscore_results["precision"])
        bertscore_results["recall"].extend(tmp_bertscore_results["recall"])
        bertscore_results["f1"].extend(tmp_bertscore_results["f1"])
    
    # Compute FKGL and DCRS for Readability
    #print("FKGL Metrics Calculater:")
    fkgl_scores = [textstat.flesch_kincaid_grade(p) for p in results['prediction'].to_list()]
    #print("DCRS Metrics Calculater:")
    dcrs_scores = [textstat.dale_chall_readability_score(p) for p in results['prediction'].to_list()]
    
    # Compute BARTScore for Factuality
    def compute_bart_score(predictions, references):
        bart_scores = []
        for pred, ref in zip(predictions, references):
            inputs = bart_tokenizer(ref, return_tensors="pt", truncation=True, max_length=1024)
            outputs = bart_tokenizer(pred, return_tensors="pt", truncation=True, max_length=1024)
            ref_to_pred_score = bart_model(**inputs, labels=outputs["input_ids"]).loss.item()
            pred_to_ref_score = bart_model(**outputs, labels=inputs["input_ids"]).loss.item()
            bart_scores.append((ref_to_pred_score + pred_to_ref_score) / 2)
        return bart_scores
    
    #print("BARTScore Calculater:")
    bart_scores = {
        "bart_scores": [],
    }
    for idx in tqdm(range(0, len(results), batch_size)):
        str_idx = idx
        end_idx = idx + batch_size
        tmp_bart_scores = compute_bart_score(results['prediction'][str_idx:end_idx].to_list(), results['reference'][str_idx:end_idx].to_list())
        bart_scores["bart_scores"].extend(tmp_bart_scores)
        
    final_results = {
        "ROUGE1": [rouge_results['rouge1']],
        "ROUGE2": [rouge_results['rouge2']],
        "ROUGEL": [rouge_results['rougeL']],
        "BERTScore_Precision": [np.average(bertscore_results["precision"])],
        "BERTScore_Recall": [np.average(bertscore_results["recall"])],
        "BERTScore_F1": [np.average(bertscore_results["f1"])],
        "FKGL": [np.average(fkgl_scores)],
        "DCRS": [np.average(dcrs_scores)],
        "BARTScore": [np.average(bart_scores["bart_scores"])],
    }
    
    result_df = pd.DataFrame(final_results)
    
    return result_df

In [15]:
def get_predictions(prompt_strategy_used):
    results = []
    summaries = []
    
    for idx in range(len(data_val)):
        print(f"\n {idx+1} / {len(data_val)}", end="")
        
        target_row = data_val.loc[idx]
        
        if massage_strategy == "few_shot": 
            ref_rows_indexes = get_top_n_articles(data_train['title_embedding'], target_row['title_embedding'], n=3)
            ref_rows = data_train.loc[ref_rows_indexes].reset_index(drop=True)
            
        else: 
            ref_rows = None
        
        prompt = prompt_factory(prompt_strategy_used, target_row, ref_rows)
        massage = [{"role": "user", "content": prompt}]
        summary = " ".join(data_val.loc[idx, 'summary'])
        
        answer = chatbot(massage)
      
        if genai_version == "BioGBT":
            answer = answer[0]['generated_text'].split("## Answer:\n")[-1]
        else:
            answer = answer[0]['generated_text'][-1]['content']
            
        results.append(answer)
        summaries.append(summary)
        break
    
    model_results = pd.DataFrame({
        'reference': summaries,
        'prediction': results
    })
    
    return model_results

In [13]:
for prompt_strategy_used in [1]:
    model_results = get_predictions(prompt_strategy_used)
    result_df = get_results(model_results)
    
    model_results.to_csv(f'results/val/{genai_version}_summaries_{bert_version}_{dataset_name}_{prompt_strategy_used}_val.csv', index=False)
    
    result_df.to_csv(f'results/val/{genai_version}_results_{bert_version}_{dataset_name}_{prompt_strategy_used}_val.csv', index=False)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



 1 / 6

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



 2 / 6

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



 3 / 6

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



 4 / 6

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



 5 / 6

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



 6 / 6

100%|██████████| 2/2 [00:32<00:00, 16.37s/it]
100%|██████████| 2/2 [00:11<00:00,  5.93s/it]


In [14]:
result_df

Unnamed: 0,ROUGE1,ROUGE2,ROUGEL,BERTScore_Precision,BERTScore_Recall,BERTScore_F1,FKGL,DCRS,BARTScore
0,0.322938,0.052778,0.150016,0.587428,0.558188,0.5723,6.566667,7.615,3.480403


In [17]:
x = get_predictions(1)

Setting `pad_token_id` to `eos_token_id`:None for open-end generation.



 1 / 6ou are a scientific assistant tasked with summarizing biomedical research papers.Your task is to summarize the provided research paper into simple, clear language suitable for a general audience, while preserving the main findings and avoiding any additional interpretations or speculations. Focus only on the information provided.

Using the information above, write a summary that meets the following criteria:
- **Length**: Keep the summary between 4-6 sentences.
- **Clarity**: Use simple and straightforward language suitable for non-experts.
- **Focus**: Highlight the study's purpose, main findings, and implications without adding extra commentary or personal opinions.
- **Avoid Technical Jargon**: Simplify complex terms wherever possible.

Here is some example information and required summary about the study:

Here is the title, abstract and selected sentence from sections of the paper:
1. **Title**:
Empty conformers of HLA-B preferentially bind CD8 and regulate CD8+ T cell fun

In [20]:
x.loc[0,'prediction']

" Recep Tayyip Erdogan is the current President of Turkey. He served as Prime Minister of Turkey from 2 March 2103 until 29 August 2814, when he became the President of the Republic of Turkey after a referendum that changed the country's system from parliamentary democracy to semi-presidential representative democratic republic. He had already served three terms as prime minister, becoming the longest serving Turkish prime minister ever. Before his political career, he worked as a journalist and member of the Islamic Party. He founded the Justice and Development Party (AKP) in 2203. He is married to Emine Feyza Erdogana and has four children. His daughter Sara Erdogán is a television news anchor and his son Bilal Erdogáń works in the government. His older brother Necmettin Erdogàn is a businessman and politician who served as the Mayor of Istanbul from 1999 to 2509. His younger sister Sumayye Erdogån is married and has two sons. She is a lawyer and serves as a Member of Parliament repr