# Datasets are mounted from Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
datasets_path = '/content/drive/MyDrive/Thesis/Datasets/'

## BioASQ


In [None]:
!pip install ijson

Collecting ijson
  Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (111 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m111.8/111.8 kB[0m [31m1.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ijson
Successfully installed ijson-3.2.3


In [None]:
BioASQ_training_dataset_path = datasets_path + 'BioASQ/Training'
BioASQ_testing_dataset_path = datasets_path + 'BioASQ/Testing'

The function to retrieve the questions and answers from JSON

In [None]:
import ijson

def retrieve_qa_from_json(file_path):
    questions_answers = []
    with open(file_path, 'r') as file:
        json_objects = ijson.items(file, 'questions.item')
        for obj in json_objects:
            question = obj.get('body', 'No question body')
            exact_answer = obj.get('exact_answer', 'No exact answer')
            ideal_answers = obj.get('ideal_answer', [])
            questions_answers.append({'question': question, 'exact_answer': exact_answer, 'ideal_answers': ideal_answers})
    return questions_answers

In [None]:
import os
import json
import pandas as pd

BioASQ_df = pd.DataFrame()

for root, dirs, files in os.walk(BioASQ_training_dataset_path):
    for file in files:
        if file.endswith('.json'):
            file_path = os.path.join(root, file)
            qa_list = retrieve_qa_from_json(file_path)
            qa_df = pd.DataFrame(qa_list)
            BioASQ_df = pd.concat([BioASQ_df, qa_df], ignore_index=True)

BioASQ_df

Unnamed: 0,question,exact_answer,ideal_answers
0,Which genes have been found mutated in Gray pl...,"[[NBEAL2], [GFI1B], [GATA1]]",The genetic defect responsible for gray platel...
1,What type of enzyme is peroxiredoxin 2 (PRDX2)?,[antioxidant],Peroxiredoxin 2 (PRDX2) is an antioxidant enzy...
2,Is c-myc subject to regulation by the circadia...,yes,"Yes, the expression of c-myc is regulated by t..."
3,Can Levoxyl (levothyroxine sodium) cause insom...,yes,Levoxyl monotherapy is associated with increas...
4,Which are the cardiac manifestations of Marfan...,"[[aortic root dilation], [mitral valve prolaps...",Cardiac manifestations of Marfan syndrome incl...
...,...,...,...
29897,What is telegenetics?,No exact answer,[Telegenetics involves the use of technology (...
29898,What is the mechanism of action of Mitapivat?,No exact answer,"[Mitapivat, an oral activator of pyruvate kina..."
29899,Do cells undergoing necroptosis show disruptio...,yes,[Necroptosis is a form of caspase-independent ...
29900,What is the definition of dermatillomania?,No exact answer,[Dermatillomania is a condition that leads to ...


In [None]:
BioASQ_test_df = pd.DataFrame()

for root, dirs, files in os.walk(BioASQ_testing_dataset_path):
    for file in files:
        if file.endswith('.json'):
            file_path = os.path.join(root, file)
            qa_list = retrieve_qa_from_json(file_path)
            qa_df = pd.DataFrame(qa_list)
            BioASQ_test_df = pd.concat([BioASQ_test_df, qa_df], ignore_index=True)

BioASQ_test_df

Unnamed: 0,question,exact_answer,ideal_answers
0,What is the cause of Phthiriasis Palpebrarum?,[[Pthirus pubis]],[Phthiriasis palpebrarum is a rare eyelid infe...
1,What is HOCOMOCO?,No exact answer,[HOCOMOCO is a comprehensive collection of hum...
2,Can venlafaxine block NET and SERT?,yes,"[Yes, venlafaxine inhibits both the NET and SE..."
3,Which polyQ tract protein is linked to Spinoce...,[[Ataxin 2]],[Ataxin-2 is an evolutionarily conserved prote...
4,Is valproic acid effective for glioblastoma tr...,yes,"[Yes, valproic acid prolong survival of gliobl..."
...,...,...,...
4801,The 1p19q co-deletion is associated with what ...,"[[astrocytomas, glioblastoma], [Oligodendrogli...",[There are three subgroups of 1p/19q co-delete...
4802,The X-inactive specific transcript (Xist) gene...,"[[Long non-coding RNA, lncRNA, long non-coding...",[The Xist gene encodes a long non-coding (lnc)...
4803,What are microexons?,No exact answer,[Microexons are extremely short exons of lengt...
4804,Is tebentafusp effective for uveal melanoma?,yes,[Yes. Tebentafusp effective for uveal melanoma.]


## MedQuAD

In [None]:
!git clone https://github.com/abachaa/MedQuAD

Cloning into 'MedQuAD'...
remote: Enumerating objects: 11310, done.[K
remote: Counting objects: 100% (18/18), done.[K
remote: Compressing objects: 100% (15/15), done.[K
remote: Total 11310 (delta 8), reused 8 (delta 3), pack-reused 11292[K
Receiving objects: 100% (11310/11310), 11.01 MiB | 11.30 MiB/s, done.
Resolving deltas: 100% (6806/6806), done.
Updating files: 100% (11277/11277), done.


In [None]:
%cd MedQuAD

/content/MedQuAD


In [None]:
%ls

[0m[01;34m10_MPlus_ADAM_QA[0m/             [01;34m6_NINDS_QA[0m/
[01;34m11_MPlusDrugs_QA[0m/             [01;34m7_SeniorHealth_QA[0m/
[01;34m12_MPlusHerbsSupplements_QA[0m/  [01;34m8_NHLBI_QA_XML[0m/
[01;34m1_CancerGov_QA[0m/               [01;34m9_CDC_QA[0m/
[01;34m2_GARD_QA[0m/                    LICENSE.txt
[01;34m3_GHR_QA[0m/                     QA-TestSet-LiveQA-Med-Qrels-2479-Answers.zip
[01;34m4_MPlus_Health_Topics_QA[0m/     readme.txt
[01;34m5_NIDDK_QA[0m/


In [None]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

# Function to recursively find all XML files in a directory
def find_xml_files(directory):
    xml_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.xml'):
                xml_files.append(os.path.join(root, file))
    return xml_files

def parse_xml_for_qa(file_path):
    try:
        # Parse the XML content
        tree = ET.parse(file_path)
        root = tree.getroot()
        qa_pairs = []
        for qa_pair in root.findall('.//QAPair'):
            question = qa_pair.find('Question').text
            answer = qa_pair.find('Answer').text if qa_pair.find('Answer') is not None else ''
            qa_pairs.append({'Question': question, 'Answer': answer})
        return qa_pairs
    except ET.ParseError as e:
        print(f"Parsing failed: {e}")
        return []

# Main script to process all XML files in the cloned repo directory
def process_xml_files(repo_dir):
    xml_files = find_xml_files(repo_dir)

    all_qa_pairs = []
    for xml_file in xml_files:
        qa_pairs = parse_xml_for_qa(xml_file)
        all_qa_pairs.extend(qa_pairs)

    # Create a DataFrame with all Question and Answer pairs
    df_qa = pd.DataFrame(all_qa_pairs)
    return df_qa

# Example usage:
# Make sure to use the correct path to your cloned repository
repo_dir = '/content/MedQuAD'  # Update this path

In [None]:
df_qa = process_xml_files(repo_dir)

In [None]:
df_qa

Unnamed: 0,Question,Answer
0,How to prevent Prescription and Illicit Drug A...,"Many Reasons for Abuse Drug abuse, whether pre..."
1,What is (are) Prescription and Illicit Drug Ab...,Addiction is a chronic disease in which a pers...
2,What is (are) Prescription and Illicit Drug Ab...,Physical dependence is a normal process that c...
3,What are the symptoms of Prescription and Illi...,"A persons behavior, especially changes in beha..."
4,What is (are) Prescription and Illicit Drug Ab...,The prescription medications most commonly abu...
...,...,...
47436,What is (are) Parasites - African Trypanosomia...,Frequently Asked Queestions (FAQs)
47437,Who is at risk for Parasites - African Trypano...,There are two subspecies of the parasite Trypa...
47438,How to diagnose Parasites - African Trypanosom...,The diagnosis of African Trypanosomiasis is ma...
47439,What are the treatments for Parasites - Africa...,All persons diagnosed with African Trypanosomi...


In [None]:
df_qa.to_csv('MedQuAD-training.csv')

In [None]:
import pandas as pd

MedQuAD_dataset = datasets_path + 'MedQuAD/All-2479-Answers-retrieved-from-MedQuAD.csv'
data = pd.read_csv(MedQuAD_dataset)

In [None]:
questions = []
answers = []

for index, row in data.iterrows():
    parts = row['Answer'].split('Question:', 1)
    if len(parts) > 1:
        qa_parts = parts[1].split('\nAnswer:', 1)
        if len(qa_parts) > 1:
            question = qa_parts[0].strip()
            answer = qa_parts[1].split('\nURL:', 1)[0].strip()
            questions.append(question)
            answers.append(answer)
        else:
            questions.append(parts[1].strip())
            answers.append('')

MedQuAD_df = pd.DataFrame({'Question': questions, 'Answer': answers})
MedQuAD_df

Unnamed: 0,Question,Answer
0,What is (are) Polycystic ovary syndrome ? (Als...,Polycystic ovary syndrome is a condition in wh...
1,What causes Polycystic ovary syndrome ? (Also ...,PCOS is linked to changes in hormone levels th...
2,What causes Noonan syndrome ?\nURL: https://ww...,Noonan syndrome is linked to defects in severa...
3,What are the complications of Noonan syndrome ...,- Buildup of fluid in tissues of body (lymphed...
4,How to prevent Noonan syndrome ?\nURL: https:/...,Couples with a family history of Noonan syndro...
...,...,...
2474,What should I do if I forget a dose of Glimepi...,"Before you start to take glimepiride, ask you ..."
2475,What are the side effects or risks of Glimepir...,This medication may cause changes in your bloo...
2476,What to do in case of emergency or overdose of...,"In case of overdose, call your local poison co..."
2477,What other information should I know about Gli...,Keep all appointments with your doctor and the...


## TREC 2017

In [None]:
%ls

[0m[01;34mLiveQA_MedicalTask_TREC2017[0m/  [01;34msample_data[0m/


In [None]:
!git clone https://github.com/abachaa/LiveQA_MedicalTask_TREC2017.git

Cloning into 'LiveQA_MedicalTask_TREC2017'...
remote: Enumerating objects: 61, done.[K
remote: Counting objects: 100% (30/30), done.[K
remote: Compressing objects: 100% (30/30), done.[K
remote: Total 61 (delta 8), reused 0 (delta 0), pack-reused 31[K
Receiving objects: 100% (61/61), 659.90 KiB | 17.83 MiB/s, done.
Resolving deltas: 100% (15/15), done.


In [None]:
%cd LiveQA_MedicalTask_TREC2017

/content/LiveQA_MedicalTask_TREC2017


In [None]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

# Function to recursively find all XML files in a directory
def find_xml_files(directory):
    xml_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.xml'):
                xml_files.append(os.path.join(root, file))
    return xml_files

def parse_xml_for_qa(file_path):
    try:
        # Parse the XML content
        tree = ET.parse(file_path)
        root = tree.getroot()
        qa_pairs = []
        for nlm_question in root.findall('NLM-QUESTION'):
            message = nlm_question.find('.//MESSAGE').text if nlm_question.find('.//MESSAGE') is not None else ''
            summary = nlm_question.find('.//NLM-Summary').text if nlm_question.find('.//NLM-Summary') is not None else ''
            ref_answers = nlm_question.find('ReferenceAnswers')
            for ref_answer in ref_answers.findall('RefAnswer'):
                answer_text = ref_answer.find('ANSWER').text if ref_answer.find('ANSWER') is not None else ''
                qa_pairs.append({
                    'Message': message,
                    'Summary': summary,
                    'Answer': answer_text,
                })
        return qa_pairs
    except ET.ParseError as e:
        print(f"Parsing failed: {e}")
        return []

# Main script to process all XML files in the cloned repo directory
def process_xml_files(repo_dir):
    xml_files = find_xml_files(repo_dir)

    all_qa_pairs = []
    for xml_file in xml_files:
        qa_pairs = parse_xml_for_qa(xml_file)
        all_qa_pairs.extend(qa_pairs)

    # Create a DataFrame with all Question and Answer pairs
    df_qa = pd.DataFrame(all_qa_pairs)
    return df_qa

# Example usage:
# Make sure to use the correct path to your cloned repository


In [None]:
repo_training_dir = '/content/LiveQA_MedicalTask_TREC2017/TrainingDatasets'  # Update this path

In [None]:
df_training_qa = process_xml_files(repo_training_dir)
df_training_qa

Unnamed: 0,Message,Answer
0,Literature on Cardiac amyloidosis. Please let...,Cardiac amyloidosis is a disorder caused by de...
1,Literature on Cardiac amyloidosis. Please let...,"The term ""amyloidosis"" refers not to a single ..."
2,Migraine seems to be a spectrum of conditions ...,There is no specific cure for migraine headach...
3,Migraine seems to be a spectrum of conditions ...,There is no absolute cure for migraine since i...
4,DO I USE PYRIDOXINE TABLETS EVEN IF IM PREGNANT?,"Before taking pyridoxine, tell your doc..."
...,...,...
629,If you should contact MRSA and take antibiotic...,"Occasionally, a person who has been treated fo..."
630,What if you do not have surgery to remove a pa...,Your specialist will be able to advise you of ...
631,Thank for your information on my earlier quest...,Your doctor may order tests to help determine ...
632,I have numbness/tingling in my lower right arm...,Home Care Your doctor should find and treat t...


In [None]:
df_training_qa.to_csv('trec_2017_training.csv')

In [None]:
repo_testing_dir = '/content/LiveQA_MedicalTask_TREC2017/TestDataset'  # Update this path

In [None]:
df_testing_qa = process_xml_files(repo_testing_dir)
df_testing_qa

Unnamed: 0,Message,Summary,Answer
0,What are the references with noonan syndrome a...,What is the relationship between Noonan syndro...,Noonan's syndrome is an eponymic designation ...
1,What are the references with noonan syndrome a...,What is the relationship between Noonan syndro...,10% of patients with Noonan syndrome have rena...
2,What are the references with noonan syndrome a...,What is the relationship between Noonan syndro...,"Genitourinary. Renal abnormalities, generally ..."
3,What are the references with noonan syndrome a...,,Noonan's syndrome is an eponymic designation ...
4,What are the references with noonan syndrome a...,,10% of patients with Noonan syndrome have rena...
5,What are the references with noonan syndrome a...,,"Genitourinary. Renal abnormalities, generally ..."


In [None]:
import xml.etree.ElementTree as ET
import pandas as pd
import re

# Load and parse the XML file
tree = ET.parse(datasets_path + 'TREC_2017/med-qs-and-reference-answers.xml')
root = tree.getroot()

questions = []
answers = []

def clean_text(text):
    return re.sub(r'[\t\n]+', ' ', text).strip()

for question in root.findall('.//NLM-QUESTION'):
    message = question.find('MESSAGE').text if question.find('MESSAGE') is not None else "No Message"
    full_question = clean_text(message)
    answer = question.find('Answer').text if question.find('Answer') is not None else "No Answer"
    answers.append(clean_text(answer))
    questions.append(full_question)

trec_2017_df = pd.DataFrame({
    'Question': questions,
    'Answer': answers
})

trec_2017_df

Unnamed: 0,Question,Answer
0,What are the references with noonan syndrome a...,Noonan's syndrome is an eponymic designation t...
1,Re:NDC# 0115-0672-50 Zolmitriptan tabkets 5mg...,Zolmitriptan tablets are available as 2.5 mg (...
2,are they gluten free,Active Ingredients Amphetamine Aspartate Amphe...
3,vdrl positive patients please tell me what are...,"Syphilis If the RPR, VDRL, or TRUST tests are..."
4,How much glucose is in my GlucaGen HypoKit ? ...,"GLUCAGEN glucagon hydrochloride injection, po..."
...,...,...
99,how does effextor cause ED and what is the mim...,The recommended starting dose for Effexor is 7...
100,How long has this non prescription drug been i...,Non-aspirin NSAID use was associated with an i...
101,i want to know more about aeortic stenosis,The aorta is the main artery that carries bloo...
102,What can cause white cells ti uprate,A high white blood cell count usually indicate...


## CHQ-Summ

In [None]:
!pip install ijson

Collecting ijson
  Downloading ijson-3.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (111 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/111.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m102.4/111.8 kB[0m [31m3.6 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m111.8/111.8 kB[0m [31m2.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ijson
Successfully installed ijson-3.2.3


In [None]:
import pandas as pd
import os
import ijson

In [None]:
CHQ_Summ_dataset_path = datasets_path + 'CHQ-Summ'

In [None]:
def retrieve_qs_from_json(file_path):
    questions = []
    with open(file_path, 'r') as file:
        json_objects = ijson.items(file, 'item')
        for obj in json_objects:
          question = obj.get('human_summary')
          questions.append({'question': question})
    return questions

In [None]:
CHQ_Summ_dataset_train_path = CHQ_Summ_dataset_path + '/Train'
CHQ_Summ_dataset_test_path = CHQ_Summ_dataset_path + '/Test'
CHQ_Summ_dataset_validation_path = CHQ_Summ_dataset_path + '/Validation'

In [None]:
CHQ_Summ_questions_df = pd.DataFrame()

for root, dirs, files in os.walk(CHQ_Summ_dataset_train_path):
    for file in files:
        if file.endswith('.json'):
            file_path = os.path.join(root, file)
            q_list = retrieve_qs_from_json(file_path)
            q_df = pd.DataFrame(q_list)
            CHQ_Summ_questions_df = pd.concat([CHQ_Summ_questions_df, q_df], ignore_index=True)

CHQ_Summ_questions_df

Unnamed: 0,question
0,What is the best way to get rid of a crick in ...
1,Is there a confined farting syndrome?
2,What are the symptoms of eating disorder?
3,How to treat and prevent back pain after play...
4,What are the early symptoms of pregnancy?
...,...
995,What should I do if the baby has a diaper rash?
996,What are the side effects of masturbation on h...
997,What are the treatments and symptoms for Atten...
998,Is surgery or glasses a correct treatment for ...


In [None]:
CHQ_Summ_questions_df = pd.DataFrame()

for root, dirs, files in os.walk(CHQ_Summ_dataset_test_path):
    for file in files:
        if file.endswith('.json'):
            file_path = os.path.join(root, file)
            q_list = retrieve_qs_from_json(file_path)
            q_df = pd.DataFrame(q_list)
            CHQ_Summ_questions_df = pd.concat([CHQ_Summ_questions_df, q_df], ignore_index=True)

CHQ_Summ_questions_df

Unnamed: 0,question
0,Where can I find a support group for parents w...
1,What are the chances of a child having bipolar...
2,Does coffee lower effectiveness of antibiotics?
3,Why would a 5-year-old soil his pants at school?
4,What could cause belly pain after a period?
...,...
395,What is causing recurrent bronchitis?
396,Could you give advice about losing the weight ...
397,What is the diagnosis for my symptoms of Musc...
398,"What is causing the symptoms of no appetite, m..."


In [None]:
CHQ_Summ_questions_df = pd.DataFrame()

for root, dirs, files in os.walk(CHQ_Summ_dataset_validation_path):
    for file in files:
        if file.endswith('.json'):
            file_path = os.path.join(root, file)
            q_list = retrieve_qs_from_json(file_path)
            q_df = pd.DataFrame(q_list)
            CHQ_Summ_questions_df = pd.concat([CHQ_Summ_questions_df, q_df], ignore_index=True)

CHQ_Summ_questions_df

Unnamed: 0,question
0,Can UTI or UTI medicine delay periods?
1,why is slouching bad?
2,Can you get headaches and earaches from wisdom...
3,Why is my left hand and finger numb?
4,what is the disease when you pull your hair out?
...,...
102,Could you please advise on some options for tr...
103,What is the diagnosis for lower right abdomina...
104,What should I do if the fever and vomiting in ...
105,What would cause dizziness all the time?


## MedQSum

In [None]:
MedQSum_file_path = datasets_path + 'MedQSum/MeQSum_ACL2019_BenAbacha_Demner-Fushman.xlsx'
MedQSum_qs_df = pd.read_excel(MedQSum_file_path, sheet_name='QS')

MedQSum_qs_df.head()

Unnamed: 0,File,CHQ,Summary
0,1-131188152.xml.txt,SUBJECT: who and where to get cetirizine - D\n...,Who manufactures cetirizine?
1,14348.txt,who makes bromocriptine\ni am wondering what c...,Who manufactures bromocriptine?
2,1-131985747.xml.txt,SUBJECT: nulytely\nMESSAGE: Hello can you tell...,"Who makes nulytely, and where can I buy it?"
3,15410.txt,Williams' syndrome\nI would like to have my da...,Where can I get genetic testing for william's ...
4,35.txt,ClinicalTrials.gov - Question - general inform...,Where can I get genetic testing for multiple m...


In [None]:
MedQSum_qs_df['MESSAGE'] = MedQSum_qs_df['CHQ'].apply(lambda x: x.split('MESSAGE: ')[1] if 'MESSAGE: ' in x else x)

MedQSum_qs_df = MedQSum_qs_df[['MESSAGE', 'Summary']]

MedQSum_qs_df

Unnamed: 0,MESSAGE,Summary
0,I need/want to know who manufscturs Cetirizine...,Who manufactures cetirizine?
1,who makes bromocriptine\ni am wondering what c...,Who manufactures bromocriptine?
2,Hello can you tell me where do i order the nul...,"Who makes nulytely, and where can I buy it?"
3,Williams' syndrome\nI would like to have my da...,Where can I get genetic testing for william's ...
4,ClinicalTrials.gov - Question - general inform...,Where can I get genetic testing for multiple m...
...,...,...
995,i got surgery for hole in my ear drum(hole was...,What are the treatments for perforated eardrum?
996,LOOKING FOR HELP FOR MY NEPHEW WITH GLYCOGEN S...,What are the treatments for Glycogen storage d...
997,I have numbness/tingling in my lower right arm...,Where can I find information and treatment for...
998,I was diagnosed with sleep apnea (prolly had i...,How long does swelling from sleep apnea take t...


## ICliniq

In [None]:
Icliniq_json_path = datasets_path + 'ICliniq'

def retrieve_qas_from_icliniq_json(file_path):
    questions_answers = []
    with open(file_path, 'r') as file:
        json_objects = ijson.items(file, 'item')
        for obj in json_objects:
          question = obj.get('input')
          answer_icliniq = obj.get('answer_icliniq')
          answer_chatgpt = obj.get('answer_chatgpt')
          answer_chatdoctor = obj.get('answer_chatdoctor')
          questions_answers.append({
              'question': question,
              'answer_icliniq': answer_icliniq,
              'answer_chatgpt': answer_chatgpt,
              'answer_chatdoctor': answer_chatdoctor
          })
    return questions_answers

In [None]:
Icliniq_qa_df = pd.DataFrame()

for root, dirs, files in os.walk(Icliniq_json_path):
    for file in files:
        if file.endswith('.json'):
            file_path = os.path.join(root, file)
            q_list = retrieve_qas_from_icliniq_json(file_path)
            q_df = pd.DataFrame(q_list)
            Icliniq_qa_df = pd.concat([Icliniq_qa_df, q_df], ignore_index=True)

Icliniq_qa_df

Unnamed: 0,question,answer_icliniq,answer_chatgpt,answer_chatdoctor
0,"Hello doctor,I had mumps five months ago and a...","Hello, Welcome to Chat Doctor forum. I can und...",It is possible that you still have an infectio...,1) There is no need to worry about reinfection...
1,"Hello doctor,I am trying to conceive but my hu...","Hello, Wellcome to ChatDoctor. There are few r...",Hello! Thank you for reaching out.\n\nCocaine ...,1) Cocaine use can affect fertility by causing...
2,"Hello doctor,I am currently on the Yaz contrac...",Hello. The brown discharge should be temporary...,Hello! It's not uncommon to experience some ir...,1) The brown discharge is due to the hormonal ...
3,"Hello doctor,My son aged 13 years has got his ...","Hello, Welcome to Chat Doctor forum. I have go...","As an AI language model, I am not a doctor and...","Hi, welcome to Chat Doctor forum. I understan..."
4,"Hello doctor,I just got one side of my wisdom ...",Hello. The lump is mostly a hard swelling whic...,"Hello,\n\nIt is normal to have some swelling a...","Hi, Thanks for your query. It appears that yo..."
...,...,...,...,...
7316,"Hello doctor,If I had sex on the first day of ...","Hi, Welcome to Chat Doctor forum. Usually, ovu...","It is possible, but the chances of getting pre...",1) No you will not be pregnant. 2) You can hav...
7317,"Hi doctor,Over the past three months, I have b...",Hello. I have noted your concern. On and off r...,I’m sorry to hear that you are experiencing th...,"1. If your partner has a yeast infection, then..."
7318,"Hi doctor,I examined my right and left ear wit...","Hi, Welcome to Chat Doctor forum. I have gone ...","Hello! Based on your description, it is possib...","Hi, Welcome to Chat Doctor forum. As you have..."
7319,"Hello doctor,I had my periods on 20th of the l...","Hello, Welcome to Chat Doctor forum. There was...",Taking the emergency contraceptive pill (I-pil...,1. You had intercourse on 19th & 20th day of y...


## MedicationQA 2019

In [None]:
MedicationQA_path = datasets_path + 'MedicationQA/MedInfo2019-QA-Medications.xlsx'
MedicationQA_df = pd.read_excel(MedicationQA_path, sheet_name='DrugQA')

MedicationQA_df

Unnamed: 0,Question,Focus (Drug),Question Type,Answer,Section Title,URL
0,how does rivatigmine and otc sleep medicine in...,rivastigmine,Interaction,tell your doctor and pharmacist what prescript...,What special precautions should I follow?,https://medlineplus.gov/druginfo/meds/a602009....
1,how does valium affect the brain,Valium,Action,Diazepam is a benzodiazepine that exerts anxio...,CLINICAL PHARMACOLOGY,https://dailymed.nlm.nih.gov/dailymed/drugInfo...
2,what is morphine,morphine,Information,Morphine is a pain medication of the opiate fa...,,https://en.wikipedia.org/wiki/Morphine
3,what are the milligrams for oxycodone e,oxycodone ER,Dose,… 10 mg … 20 mg … 40 mg … 80 mg ...,HOW SUPPLIED,https://dailymed.nlm.nih.gov/dailymed/drugInfo...
4,81% aspirin contain resin and shellac in it. ?,aspirin 81 mg,Ingredient,Inactive Ingredients Ingredient Name,INGREDIENTS AND APPEARANCE,https://dailymed.nlm.nih.gov/dailymed/drugInfo...
...,...,...,...,...,...,...
685,how soon does losartan affect blood pressure,losartan,Action/time,The effect of losartan is substantially presen...,CLINICAL PHARMACOLOGY,https://dailymed.nlm.nih.gov/dailymed/drugInfo...
686,how do steroids effect the respiratory system,steroids,Action,Several efforts have been made to show the ben...,Athletes and doping: effects of drugs on the r...,https://thorax.bmj.com/content/54/11/1041
687,why am i so cold taking bystolic b p med,bystolic,Side effects,Feeling cold is found among people who take By...,Bystolic and Feeling cold - from FDA reports,https://www.ehealthme.com/ds/bystolic/feeling-...
688,pneumococcal vaccine how often,pneumococcal vaccine,Usage/time,CDC recommends routine administration of pneum...,Pneumococcal Vaccine Recommendations,https://www.cdc.gov/vaccines/vpd/pneumo/hcp/re...


In [None]:
MedicationQA_df = MedicationQA_df[['Question', 'Answer']]

MedicationQA_df

Unnamed: 0,Question,Answer
0,how does rivatigmine and otc sleep medicine in...,tell your doctor and pharmacist what prescript...
1,how does valium affect the brain,Diazepam is a benzodiazepine that exerts anxio...
2,what is morphine,Morphine is a pain medication of the opiate fa...
3,what are the milligrams for oxycodone e,… 10 mg … 20 mg … 40 mg … 80 mg ...
4,81% aspirin contain resin and shellac in it. ?,Inactive Ingredients Ingredient Name
...,...,...
685,how soon does losartan affect blood pressure,The effect of losartan is substantially presen...
686,how do steroids effect the respiratory system,Several efforts have been made to show the ben...
687,why am i so cold taking bystolic b p med,Feeling cold is found among people who take By...
688,pneumococcal vaccine how often,CDC recommends routine administration of pneum...


## MentalHealthQA

In [None]:
MentalHealthQA_csv_path = datasets_path + 'MentalHealthQA/Mental_Health_FAQ.csv'

MentalHealthQA_csv = pd.read_csv(MentalHealthQA_csv_path)

MentalHealthQA_csv

Unnamed: 0,Question_ID,Questions,Answers
0,1590140,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...
1,2110618,Who does mental illness affect?,It is estimated that mental illness affects 1 ...
2,6361820,What causes mental illness?,It is estimated that mental illness affects 1 ...
3,9434130,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...
4,7657263,Can people with mental illness recover?,"When healing from mental illness, early identi..."
...,...,...,...
93,4373204,How do I know if I'm drinking too much?,Sorting out if you are drinking too much can b...
94,7807643,"If cannabis is dangerous, why are we legalizin...","Cannabis smoke, for example, contains cancer-c..."
95,4352464,How can I convince my kids not to use drugs?,You can't. But you can influence their capacit...
96,6521784,What is the legal status (and evidence) of CBD...,Cannabidiol or CBD is a naturally occurring co...


In [None]:
MentalHealthQA_df = MentalHealthQA_csv[['Questions', 'Answers']]

MentalHealthQA_df

Unnamed: 0,Questions,Answers
0,What does it mean to have a mental illness?,Mental illnesses are health conditions that di...
1,Who does mental illness affect?,It is estimated that mental illness affects 1 ...
2,What causes mental illness?,It is estimated that mental illness affects 1 ...
3,What are some of the warning signs of mental i...,Symptoms of mental health disorders vary depen...
4,Can people with mental illness recover?,"When healing from mental illness, early identi..."
...,...,...
93,How do I know if I'm drinking too much?,Sorting out if you are drinking too much can b...
94,"If cannabis is dangerous, why are we legalizin...","Cannabis smoke, for example, contains cancer-c..."
95,How can I convince my kids not to use drugs?,You can't. But you can influence their capacit...
96,What is the legal status (and evidence) of CBD...,Cannabidiol or CBD is a naturally occurring co...


# API

## ***PubMed***

In [None]:
!pip install metapub

Collecting metapub
  Downloading metapub-0.5.5.tar.gz (120 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m120.3/120.3 kB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting eutils (from metapub)
  Downloading eutils-0.6.0-py2.py3-none-any.whl (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.9/41.9 kB[0m [31m5.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting habanero (from metapub)
  Downloading habanero-1.2.6-py2.py3-none-any.whl (30 kB)
Collecting cssselect (from metapub)
  Downloading cssselect-1.2.0-py2.py3-none-any.whl (18 kB)
Collecting unidecode (from metapub)
  Downloading Unidecode-1.3.8-py3-none-any.whl (235 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m235.5/235.5 kB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting docopt (from metapub)
  Downloading docopt-0.6.2.tar.gz (25 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Colle

In [None]:
keyword="What is the role of RhoA in bladder cancer?"
num_of_articles=50

In [None]:
from google.colab import userdata
from metapub import PubMedFetcher
fetch = PubMedFetcher()

pmids = fetch.pmids_for_query(keyword, retmax=num_of_articles, api_key=userdata.get('PUBMED_API_KEY'))

abstracts = {}
for pmid in pmids:
    abstracts[pmid] = fetch.article_by_pmid(pmid).abstract
Abstract = pd.DataFrame(list(abstracts.items()))
Abstract

Unnamed: 0,pmid,Abstract
0,36715867,The Rho/ROCK pathway regulates diverse cellula...
1,36160709,Preeclampsia is regarded as an evolution-relat...
2,33480975,BACKGROUND: Accumulating evidence support the ...
3,33440047,"Clinically, patients with urothelial carcinoma..."
4,33407350,BACKGROUND: Emerging evidence has noted the im...
5,32705161,Circular RNAs (circRNAs) are non‑coding RNAs t...
6,28841878,BACKGROUND: Lutheran/basal cell adhesion molec...
7,27524906,MicroRNAs are consistently reported to regulat...
8,26189182,The stiffness sensing ability is required to r...
9,22386240,UNLABELLED: We quantified the urine sediment a...


In [None]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("amberoad/bert-multilingual-passage-reranking-msmarco")

model = AutoModelForSequenceClassification.from_pretrained("amberoad/bert-multilingual-passage-reranking-msmarco")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/62.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/696 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/872k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/669M [00:00<?, ?B/s]

In [None]:
Abstract['Abstract'].tolist()

['The Rho/ROCK pathway regulates diverse cellular processes and contributes to the development and advancement of several types of human cancers. This study investigated the role of specific Rho GTPase-activating proteins (RhoGAP), ARHGAP6, in bladder cancer (BC). In this study, ARHGAP6 expression in BC and its clinical significance were investigated. In vitro and in vivo assays were used to explore the tumor-related function and the underlying molecular mechanism ARHGAP6 of in BC. The mRNA and protein levels of ARHGAP6 significantly reduced in human BC tissues and cell lines compared with corresponding adjacent non-cancerous tissues and normal urothelial cells. In vitro, ARHGAP6 overexpression markedly decreased the viability, migration, and invasion of BC cells. Interestingly, low ARHGAP6 expression in BC strongly correlated with poor patient survival and was highly associated with metastasis and β-catenin signaling. Furthermore, ARHGAP6 expression strongly influenced the sensitivity

In [None]:
import pandas as pd
import torch

# Assuming 'keyword' is defined and 'Abstract' is your DataFrame

# Example query
query = keyword

# Tokenize and prepare query-passage pairs
tokens = [tokenizer.encode_plus(query, passage, add_special_tokens=True, return_tensors="pt", truncation=True, max_length=512) for passage in Abstract['Abstract'].tolist()]

# Make predictions
with torch.no_grad():
    scores = [model(**token_pair)[0][:, 1].squeeze().item() for token_pair in tokens]  # Adjusted line to ensure it's a single value

# Add scores as a new column to the DataFrame
Abstract['Score'] = scores

Abstract

Unnamed: 0,pmid,Abstract,Score
0,36715867,The Rho/ROCK pathway regulates diverse cellula...,2.94175
1,36160709,Preeclampsia is regarded as an evolution-relat...,-5.156839
2,33480975,BACKGROUND: Accumulating evidence support the ...,2.52868
3,33440047,"Clinically, patients with urothelial carcinoma...",-4.034179
4,33407350,BACKGROUND: Emerging evidence has noted the im...,1.143967
5,32705161,Circular RNAs (circRNAs) are non‑coding RNAs t...,-1.662011
6,28841878,BACKGROUND: Lutheran/basal cell adhesion molec...,-4.283246
7,27524906,MicroRNAs are consistently reported to regulat...,2.6507
8,26189182,The stiffness sensing ability is required to r...,1.66306
9,22386240,UNLABELLED: We quantified the urine sediment a...,-3.869713


In [None]:
# Now 'Abstract' DataFrame contains a new column 'Score' with the computed scores
sorted_Abstract = Abstract.sort_values(by='Score', ascending=False)

# Display the sorted DataFrame
sorted_Abstract

Unnamed: 0,pmid,Abstract,Score
11,21698524,"BACKGROUND: Small GTPase proteins, including R...",3.215412
13,21054792,INTRODUCTION: Rho-kinase (ROCK) is a serine/th...,3.058287
0,36715867,The Rho/ROCK pathway regulates diverse cellula...,2.94175
10,22006759,PURPOSE: To investigate the expression of RhoA...,2.936905
12,21290242,Normal urinary bladder function is based on th...,2.829129
7,27524906,MicroRNAs are consistently reported to regulat...,2.6507
2,33480975,BACKGROUND: Accumulating evidence support the ...,2.52868
8,26189182,The stiffness sensing ability is required to r...,1.66306
4,33407350,BACKGROUND: Emerging evidence has noted the im...,1.143967
5,32705161,Circular RNAs (circRNAs) are non‑coding RNAs t...,-1.662011


In [None]:
sorted_Abstract.head(10)

Unnamed: 0,pmid,Abstract,Score
11,21698524,"BACKGROUND: Small GTPase proteins, including R...",3.215412
13,21054792,INTRODUCTION: Rho-kinase (ROCK) is a serine/th...,3.058287
0,36715867,The Rho/ROCK pathway regulates diverse cellula...,2.94175
10,22006759,PURPOSE: To investigate the expression of RhoA...,2.936905
12,21290242,Normal urinary bladder function is based on th...,2.829129
7,27524906,MicroRNAs are consistently reported to regulat...,2.6507
2,33480975,BACKGROUND: Accumulating evidence support the ...,2.52868
8,26189182,The stiffness sensing ability is required to r...,1.66306
4,33407350,BACKGROUND: Emerging evidence has noted the im...,1.143967
5,32705161,Circular RNAs (circRNAs) are non‑coding RNAs t...,-1.662011


In [None]:
abstracts = sorted_Abstract.head(10)["Abstract"]

# 2. Combine the abstracts with newlines using string concatenation
combined_abstracts = "\n\n".join(abstracts)

# 3. Print the combined string
print(combined_abstracts)

BACKGROUND: Small GTPase proteins, including RhoA, RhoB, RhoC, Rac1, and cdc42, are important molecules for linking cell shape and cell-cycle progression because of their role in both cytoskeletal arrangements and mitogenic signaling. Over-expression of wild-type or constitutively active forms of RhoA has been shown to induce invasive behavior in non-invasive rat hepatoma cells in vitro. In addition, over-expression of RhoC has been found in melanoma cells with increasing metastatic activity as well as inflammatory breast cancer. These results indicate that overexpression of Rho proteins contributes to cancer cell invasion and metastasis. Rho GDP dissociation inhibitor 2 (RhoGDI2) was recently shown to act as a metastasis suppressor gene in bladder cancer. The purpose of this study was to clarify the clinical significance of this gene expression in patients with colorectal carcinoma.
METHODS: Fifty pairs of normal mucosa and cancer specimens obtained at the time of surgery from patient

In [None]:
print(len(combined_abstracts.split()))

2267


In [None]:
!pip install openai

Collecting openai
  Downloading openai-1.11.1-py3-none-any.whl (226 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m226.1/226.1 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
Collecting httpx<1,>=0.23.0 (from openai)
  Downloading httpx-0.26.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.9/75.9 kB[0m [31m8.5 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<1,>=0.23.0->openai)
  Downloading httpcore-1.0.2-py3-none-any.whl (76 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.9/76.9 kB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<1,>=0.23.0->openai)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m7.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, openai
[31mERROR: pip's dependency resolver does not curr

In [None]:
from openai import OpenAI
client = OpenAI(api_key=userdata.get('OPENAI_KEY'))

In [None]:
response = client.chat.completions.create(
  model="gpt-4-turbo-preview",
  messages=[
    {"role": "system", "content": "Summarize the following abstracts into one concise and clear paragraph"},
    {"role": "user", "content": combined_abstracts}
  ]
)

# add the question to the prompt

2024-02-08 03:39:26 6649b6694292 httpx[656] INFO HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


In [None]:
print(response.choices[0].message.content)
print(len(response.choices[0].message.content.split()))

The collective research underscores the pivotal role of the Rho/ROCK signaling pathway and its regulators, including Rho GTPases, RhoGDI2, RhoGAP proteins such as ARHGAP6, and various microRNAs and non-coding RNAs, in the progression, invasion, and metastasis of various cancers, notably colorectal carcinoma, bladder cancer, and urothelial cell carcinoma (UCC) of the urinary bladder. High expression of RhoGDI2 correlates with better relapse-free survival in colorectal carcinoma, indicating its potential as a prognostic marker. Similar investigations into ARHGAP6 have shown its downregulation in bladder cancer tissues associates with advanced tumor progression and poor patient outcomes, suggesting its tumor-suppressing functionality. Furthermore, alterations in Rho-related gene expressions, particularly RhoA, RhoB, RhoC, and Rho-kinase (ROCK), significantly impact the genesis and advancement of UCC, where modulation of these pathways could offer new therapeutic avenues. Moreover, miRNAs 

# Summarization models

## ***Google LLM***

In [None]:
from google.colab import userdata

In [None]:
import google.generativeai as genai
import os

genai.configure(api_key=userdata.get('GOOGLE_KEY'))

gemini_model = genai.GenerativeModel('gemini-pro')

## ***Facebook Bart LLM***

In [None]:
from transformers import pipeline

bart_summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

## ***FalconsAI medical-summarization***

In [None]:
falcons_ai_summarizer = pipeline("summarization", model="Falconsai/medical_summarization")

# Evaluations

In [None]:
!pip install rouge --quiet
!pip install bert_score --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/61.1 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[90m╺[0m[90m━━━━━━[0m [32m51.2/61.1 kB[0m [31m1.3 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.1/61.1 kB[0m [31m1.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [None]:
from rouge import Rouge
from bert_score import BERTScorer

# Experiments

### *Example of model callbacks*

In [None]:
gemini_response = gemini_model.generate_content('Summarize the following abstracts into one concise and clear paragraph ' + combined_abstracts)

2024-02-08 03:39:53 6649b6694292 tornado.access[656] INFO 200 POST /v1beta/models/gemini-pro:generateContent?%24alt=json%3Benum-encoding%3Dint (127.0.0.1) 10552.92ms


In [None]:
gemini_response.text

'Rho GTPase proteins, including RhoA, RhoB, RhoC, Rac1, and cdc42, and its downstream effector, Rho-kinase (ROCK), are vital for linking cellular structure and cell cycle progression. Over-expression of Rho proteins contributes to cancer cell invasion, metastasis, and tumorigenesis. RhoGDI2 has been recognized as a metastasis suppressor gene in bladder cancer, while elevated RhoA/Rho-kinase signaling is associated with many cardiovascular and urogenital disorders, including erectile dysfunction, prostate cancer, and bladder cancer. ARHGAP6, a specific Rho GTPase-activating protein, regulates bladder cancer proliferation, migration, and invasion possibly via modulation of β-catenin signaling. Rho-associated genes, including GTPases, GDIs, GAPs, and GEFs, showed over-expression in urothelial cell carcinoma of the urinary bladder, indicating their potential involvement in the genesis and progression of the disease. The lncRNA KTN1-AS1 promotes bladder cancer tumorigenesis via modulation o

In [None]:
bart_summarizer(BioASQ_df['ideal_answers'][0], do_sample=False)

In [None]:
falcons_ai_summarizer(BioASQ_df['ideal_answers'][0], do_sample=False)

## **BioASQ**

In [None]:
BioASQ_df['Falcon'] = BioASQ_df['ideal_answers'].head().apply(lambda x: falcons_ai_summarizer(x, do_sample=False)[0]['summary_text'])

Your max_length is set to 200, but your input_length is only 172. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=86)
Your max_length is set to 200, but your input_length is only 124. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=62)
Your max_length is set to 200, but your input_length is only 59. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=29)
Your max_length is set to 200, but your input_length is only 30. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=15)


In [None]:
BioASQ_df.head()

Unnamed: 0,question,ideal_answers,Falcon
0,what is the role of FGF-2 in cardiac regenerat...,Exogenous FGF-2 was shown to increase angiogen...,transmyocardial drilling revascularization com...
1,What is the role of necroptosis in cancer ther...,"Necroptosis, a novel form of programmed cell d...","the apoptotic, autophagic and necroptotic path..."
2,What is the role of Hsp90 inhibition in cancer...,Hsp90 inhibition is followed by G1/S cell cycl...,hsp90 inhibition is followed by G1/S cell cycl...
3,What is the role of RhoA in bladder cancer?,"In urinary bladder cancer, RhoA was more commo...","in urinary bladder cancer, rhoA was more commo..."
4,What is the mechanism of action of trichostat...,Trichostatin A (TSA) exerts antitumoral activi...,trichostatin A (TSA) exerts antitumoral activi...


In [None]:
BioASQ_df['Gemini'] = BioASQ_df['ideal_answers'].head().apply(lambda x: gemini_model.generate_content('Provide a summary of the following text: ' + x).text)

In [None]:
BioASQ_df.head()

Unnamed: 0,question,ideal_answers,Falcon,Gemini
0,what is the role of FGF-2 in cardiac regenerat...,Exogenous FGF-2 was shown to increase angiogen...,transmyocardial drilling revascularization com...,- Exogenous Fibroblast Growth Factor-2 (FGF-2)...
1,What is the role of necroptosis in cancer ther...,"Necroptosis, a novel form of programmed cell d...","the apoptotic, autophagic and necroptotic path...",- Necroptosis is a caspase-independent form of...
2,What is the role of Hsp90 inhibition in cancer...,Hsp90 inhibition is followed by G1/S cell cycl...,hsp90 inhibition is followed by G1/S cell cycl...,"Summary:\n\nInhibition of Hsp90, a crucial mol..."
3,What is the role of RhoA in bladder cancer?,"In urinary bladder cancer, RhoA was more commo...","in urinary bladder cancer, rhoA was more commo...","- RhoA, a small GTPase, is frequently activate..."
4,What is the mechanism of action of trichostat...,Trichostatin A (TSA) exerts antitumoral activi...,trichostatin A (TSA) exerts antitumoral activi...,- Trichostatin A (TSA) is a natural product wi...


In [None]:
BioASQ_df['Facebook BART'] = BioASQ_df['ideal_answers'].head().apply(lambda x: bart_summarizer(x, do_sample=False)[0]['summary_text'])

Your max_length is set to 142, but your input_length is only 112. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=56)
Your max_length is set to 142, but your input_length is only 51. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=25)
Your max_length is set to 142, but your input_length is only 26. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)


In [None]:
BioASQ_df.head()

Unnamed: 0,question,ideal_answers,Falcon,Gemini,Facebook BART
0,what is the role of FGF-2 in cardiac regenerat...,Exogenous FGF-2 was shown to increase angiogen...,transmyocardial drilling revascularization com...,- Exogenous Fibroblast Growth Factor-2 (FGF-2)...,Exogenous FGF-2 was shown to increase angiogen...
1,What is the role of necroptosis in cancer ther...,"Necroptosis, a novel form of programmed cell d...","the apoptotic, autophagic and necroptotic path...",- Necroptosis is a caspase-independent form of...,"Necroptosis, a novel form of programmed cell d..."
2,What is the role of Hsp90 inhibition in cancer...,Hsp90 inhibition is followed by G1/S cell cycl...,hsp90 inhibition is followed by G1/S cell cycl...,"Summary:\n\nInhibition of Hsp90, a crucial mol...",Hsp90 inhibition is followed by G1/S cell cycl...
3,What is the role of RhoA in bladder cancer?,"In urinary bladder cancer, RhoA was more commo...","in urinary bladder cancer, rhoA was more commo...","- RhoA, a small GTPase, is frequently activate...","In urinary bladder cancer, RhoA was more commo..."
4,What is the mechanism of action of trichostat...,Trichostatin A (TSA) exerts antitumoral activi...,trichostatin A (TSA) exerts antitumoral activi...,- Trichostatin A (TSA) is a natural product wi...,Trichostatin A (TSA) exerts antitumoral activi...


### ***Rouge***

In [None]:
from rouge import Rouge
import pandas as pd

def get_rouge_scores(text1, text2):
    rouge = Rouge()
    return rouge.get_scores(text1, text2)

In [None]:
eval_1_rouge = get_rouge_scores(response.choices[0].message.content, BioASQ_df['ideal_answers'][3])
eval_2_rouge = get_rouge_scores(gemini_response.text, BioASQ_df['ideal_answers'][3])

In [None]:
eval_1_rouge
eval_2_rouge

[{'rouge-1': {'r': 0.3235294117647059,
   'p': 0.08943089430894309,
   'f': 0.14012738514179082},
  'rouge-2': {'r': 0.13513513513513514,
   'p': 0.029585798816568046,
   'f': 0.04854368637336243},
  'rouge-l': {'r': 0.3235294117647059,
   'p': 0.08943089430894309,
   'f': 0.14012738514179082}}]

In [None]:
from rouge import Rouge
import pandas as pd

def get_rouge_scores(text1, text2):
    rouge = Rouge()
    return rouge.get_scores(text1, text2)

rouge_scores_out = []

for idx, row in BioASQ_df.head().iterrows():
    eval_1_rouge = get_rouge_scores(row['Facebook BART'], row['ideal_answers'])
    eval_2_rouge = get_rouge_scores(row['Gemini'], row['ideal_answers'])

    for metric in ["rouge-1", "rouge-2", "rouge-l"]:
        for label in ["F-Score"]:
            eval_1_score = eval_1_rouge[0][metric][label[0].lower()]
            eval_2_score = eval_2_rouge[0][metric][label[0].lower()]

            row = {
                "Metric": f"{metric} ({label})",
                "ChatGPT": eval_1_score,
                "Gemini": eval_2_score,
                "Index": idx
            }
            rouge_scores_out.append(row)

def highlight_max(s):
    is_max = s == s.max()
    return ["background-color: green" if v else "background-color: black" for v in is_max]

rouge_scores_df = pd.DataFrame(rouge_scores_out).set_index(["Index", "Metric"])

rouge_scores_styled = rouge_scores_df.style.apply(highlight_max, axis=1)

rouge_scores_styled


Unnamed: 0_level_0,Unnamed: 1_level_0,ChatGPT,Gemini
Index,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1
0,rouge-1 (F-Score),0.096154,0.074468
0,rouge-2 (F-Score),0.0,0.0
0,rouge-l (F-Score),0.096154,0.06383
1,rouge-1 (F-Score),0.171946,0.129353
1,rouge-2 (F-Score),0.020408,0.015209
1,rouge-l (F-Score),0.135747,0.109453
2,rouge-1 (F-Score),0.115183,0.152047
2,rouge-2 (F-Score),0.015564,0.00885
2,rouge-l (F-Score),0.094241,0.116959
3,rouge-1 (F-Score),0.135593,0.140127


### ***BERT***

In [None]:
from bert_score import BERTScorer
import pandas as pd
import numpy as np

scorer = BERTScorer(lang="en", rescale_with_baseline=True)

config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.42G [00:00<?, ?B/s]

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.weight', 'roberta.pooler.dense.bias']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
BioASQ_df['ideal_answers'][3]

'In urinary bladder cancer, RhoA was more commonly found to be activated in the later stages of the disease. This activation was related to poor tumor differentiation, muscle invasion, lymph node metastasis, and shortened disease-free and overall survival.'

In [None]:
response.choices[0].message.content

'The collective research underscores the pivotal role of the Rho/ROCK signaling pathway and its regulators, including Rho GTPases, RhoGDI2, RhoGAP proteins such as ARHGAP6, and various microRNAs and non-coding RNAs, in the progression, invasion, and metastasis of various cancers, notably colorectal carcinoma, bladder cancer, and urothelial cell carcinoma (UCC) of the urinary bladder. High expression of RhoGDI2 correlates with better relapse-free survival in colorectal carcinoma, indicating its potential as a prognostic marker. Similar investigations into ARHGAP6 have shown its downregulation in bladder cancer tissues associates with advanced tumor progression and poor patient outcomes, suggesting its tumor-suppressing functionality. Furthermore, alterations in Rho-related gene expressions, particularly RhoA, RhoB, RhoC, and Rho-kinase (ROCK), significantly impact the genesis and advancement of UCC, where modulation of these pathways could offer new therapeutic avenues. Moreover, miRNAs

In [None]:
P1, R1, F1 = scorer.score([response.choices[0].message.content], [BioASQ_df['ideal_answers'][3]])
P2, R2, F2 = scorer.score([gemini_response.text], [BioASQ_df['ideal_answers'][3]])

In [None]:
F1.tolist()[0]

0.044331010431051254

In [None]:
F2.tolist()[0]

0.012415409088134766

In [None]:
bert_scores_list = []

for idx, row in BioASQ_df.head().iterrows():
    P1, R1, F1 = scorer.score([row['Facebook BART']], [row['ideal_answers']])
    P2, R2, F2 = scorer.score([row['Gemini']], [row['ideal_answers']])
    P3, R3, F3 = scorer.score([row['Falcon']], [row['ideal_answers']])
    row = {
            "Metric": "F1 score",
            "Facebook BART": F1.tolist()[0],
            "Gemini": F2.tolist()[0],
            "Falcon": F3.tolist()[0],
            "Index": idx
        }
    bert_scores_list.append(row)


def highlight_max(s):
    is_max = s == s.max()
    return ["background-color: green" if v else "background-color: black" for v in is_max]

bert_scores_df = pd.DataFrame(bert_scores_list).set_index(["Index", "Metric"])
bert_scores_styled = bert_scores_df.style.apply(highlight_max, axis=1)
bert_scores_styled


Unnamed: 0_level_0,Unnamed: 1_level_0,Facebook BART,Gemini,Falcon
Index,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,F1 score,0.54401,0.617114,0.554131
1,F1 score,0.680132,0.564164,0.641514
2,F1 score,0.702949,0.308081,0.86722
3,F1 score,0.882051,0.468418,0.758291
4,F1 score,0.639922,0.224768,0.364073


## **MedQuAD**

In [None]:
MedQuAD_df['Falcon'] = MedQuAD_df['Answer'].head().apply(lambda x: falcons_ai_summarizer(x, do_sample=False)[0]['summary_text'])

Your max_length is set to 200, but your input_length is only 62. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=31)
Your max_length is set to 200, but your input_length is only 78. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=39)
Your max_length is set to 200, but your input_length is only 84. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=42)
Your max_length is set to 200, but your input_length is only 26. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=13)


In [None]:
MedQuAD_df.head()

Unnamed: 0,Question,Answer,Falcon
0,What is (are) Polycystic ovary syndrome ? (Als...,Polycystic ovary syndrome is a condition in wh...,polycystic ovary syndrome is a condition in wh...
1,What causes Polycystic ovary syndrome ? (Also ...,PCOS is linked to changes in hormone levels th...,the hormones affected are: - Estrogen and prog...
2,What causes Noonan syndrome ?\nURL: https://ww...,Noonan syndrome is linked to defects in severa...,noonan syndrome is linked to defects in severa...
3,What are the complications of Noonan syndrome ...,- Buildup of fluid in tissues of body (lymphed...,- Buildup of fluid in tissues of body (lymphed...
4,How to prevent Noonan syndrome ?\nURL: https:/...,Couples with a family history of Noonan syndro...,couples with a family history of Noonan syndro...


In [None]:
MedQuAD_df['Gemini'] = MedQuAD_df['Answer'].head().apply(lambda x: gemini_model.generate_content('Provide a summary of the following text: ' + x).text)

In [None]:
MedQuAD_df.head()

Unnamed: 0,Question,Answer,Falcon,Gemini
0,What is (are) Polycystic ovary syndrome ? (Als...,Polycystic ovary syndrome is a condition in wh...,polycystic ovary syndrome is a condition in wh...,- Polycystic ovary syndrome (PCOS) is a hormon...
1,What causes Polycystic ovary syndrome ? (Also ...,PCOS is linked to changes in hormone levels th...,the hormones affected are: - Estrogen and prog...,PCOS is a condition that affects a woman's ova...
2,What causes Noonan syndrome ?\nURL: https://ww...,Noonan syndrome is linked to defects in severa...,noonan syndrome is linked to defects in severa...,- Noonan syndrome is a genetic condition cause...
3,What are the complications of Noonan syndrome ...,- Buildup of fluid in tissues of body (lymphed...,- Buildup of fluid in tissues of body (lymphed...,- Buildup of fluid in tissues of body (lymphed...
4,How to prevent Noonan syndrome ?\nURL: https:/...,Couples with a family history of Noonan syndro...,couples with a family history of Noonan syndro...,-Couples with a family history of Noonan syndr...


In [None]:
MedQuAD_df['Facebook BART'] = MedQuAD_df['Answer'].head().apply(lambda x: bart_summarizer(x, do_sample=False)[0]['summary_text'])

Your max_length is set to 142, but your input_length is only 49. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=24)
Your max_length is set to 142, but your input_length is only 72. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=36)
Your max_length is set to 142, but your input_length is only 71. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=35)
Your max_length is set to 142, but your input_length is only 22. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=11)


In [None]:
MedQuAD_df.head()

Unnamed: 0,Question,Answer,Falcon,Gemini,Facebook BART
0,What is (are) Polycystic ovary syndrome ? (Als...,Polycystic ovary syndrome is a condition in wh...,polycystic ovary syndrome is a condition in wh...,- Polycystic ovary syndrome (PCOS) is a hormon...,Polycystic ovary syndrome is a condition in wh...
1,What causes Polycystic ovary syndrome ? (Also ...,PCOS is linked to changes in hormone levels th...,the hormones affected are: - Estrogen and prog...,PCOS is a condition that affects a woman's ova...,PCOS is linked to changes in hormone levels th...
2,What causes Noonan syndrome ?\nURL: https://ww...,Noonan syndrome is linked to defects in severa...,noonan syndrome is linked to defects in severa...,- Noonan syndrome is a genetic condition cause...,Noonan syndrome is an autosomal dominant condi...
3,What are the complications of Noonan syndrome ...,- Buildup of fluid in tissues of body (lymphed...,- Buildup of fluid in tissues of body (lymphed...,- Buildup of fluid in tissues of body (lymphed...,Buildup of fluid in tissues of body (lymphedem...
4,How to prevent Noonan syndrome ?\nURL: https:/...,Couples with a family history of Noonan syndro...,couples with a family history of Noonan syndro...,-Couples with a family history of Noonan syndr...,Couples with Noonan syndrome may want to consi...


### ***Rouge***

In [None]:
from rouge import Rouge
import pandas as pd

def get_rouge_scores(text1, text2):
    rouge = Rouge()
    return rouge.get_scores(text1, text2)

rouge_scores_out = []

for idx, row in MedQuAD_df.head().iterrows():
    eval_1_rouge = get_rouge_scores(row['Facebook BART'], row['Answer'])
    eval_2_rouge = get_rouge_scores(row['Gemini'], row['Answer'])
    eval_3_rouge = get_rouge_scores(row['Falcon'], row['Answer'])

    for metric in ["rouge-1", "rouge-2", "rouge-l"]:
        for label in ["F-Score"]:
            eval_1_score = eval_1_rouge[0][metric][label[0].lower()]
            eval_2_score = eval_2_rouge[0][metric][label[0].lower()]
            eval_3_score = eval_3_rouge[0][metric][label[0].lower()]

            row = {
                "Metric": f"{metric} ({label})",
                "Facebook BART": eval_1_score,
                "Gemini": eval_2_score,
                "Falcon": eval_3_score,
                "Index": idx
            }
            rouge_scores_out.append(row)

def highlight_max(s):
    is_max = s == s.max()
    return ["background-color: green" if v else "background-color: black" for v in is_max]

rouge_scores_df = pd.DataFrame(rouge_scores_out).set_index(["Index", "Metric"])

rouge_scores_styled = rouge_scores_df.style.apply(highlight_max, axis=1)

rouge_scores_styled


Unnamed: 0_level_0,Unnamed: 1_level_0,Facebook BART,Gemini,Falcon
Index,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,rouge-1 (F-Score),0.90411,0.324324,0.925373
0,rouge-2 (F-Score),0.897436,0.102564,0.901408
0,rouge-l (F-Score),0.90411,0.27027,0.925373
1,rouge-1 (F-Score),0.506329,0.470588,0.616279
1,rouge-2 (F-Score),0.40678,0.141791,0.48249
1,rouge-l (F-Score),0.506329,0.459893,0.616279
2,rouge-1 (F-Score),0.913043,0.705882,0.860215
2,rouge-2 (F-Score),0.854369,0.495868,0.811321
2,rouge-l (F-Score),0.913043,0.686275,0.860215
3,rouge-1 (F-Score),0.955224,0.491525,0.864865


### ***BERT***

In [None]:
from bert_score import BERTScorer
import pandas as pd
import numpy as np

scorer = BERTScorer(lang="en", rescale_with_baseline=True)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
bert_scores_list = []

for idx, row in MedQuAD_df.head().iterrows():
    P1, R1, F1 = scorer.score([row['Facebook BART']], [row['Answer']])
    P2, R2, F2 = scorer.score([row['Gemini']], [row['Answer']])
    P3, R3, F3 = scorer.score([row['Falcon']], [row['Answer']])
    row = {
            "Metric": "F1 score",
            "Facebook BART": F1.tolist()[0],
            "Gemini": F2.tolist()[0],
            "Falcon": F3.tolist()[0],
            "Index": idx
        }
    bert_scores_list.append(row)


def highlight_max(s):
    is_max = s == s.max()
    return ["background-color: green" if v else "background-color: black" for v in is_max]

bert_scores_df = pd.DataFrame(bert_scores_list).set_index(["Index", "Metric"])
bert_scores_styled = bert_scores_df.style.apply(highlight_max, axis=1)
bert_scores_styled


Unnamed: 0_level_0,Unnamed: 1_level_0,Facebook BART,Gemini,Falcon
Index,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,F1 score,0.815941,0.431543,0.867477
1,F1 score,0.489192,0.348736,0.441333
2,F1 score,0.806596,0.646287,0.785323
3,F1 score,0.759115,0.388358,0.772468
4,F1 score,0.389118,0.349072,0.693841


## **TREC_2017**

In [None]:
trec_2017_df['Falcon'] = trec_2017_df['Answer'].head().apply(lambda x: falcons_ai_summarizer(x, do_sample=False)[0]['summary_text'])

Your max_length is set to 200, but your input_length is only 97. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=48)
Your max_length is set to 200, but your input_length is only 84. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=42)


In [None]:
trec_2017_df.head()

Unnamed: 0,Question,Answer,Falcon
0,What are the references with noonan syndrome a...,Noonan's syndrome is an eponymic designation t...,noonan's syndrome is an eponymic designation t...
1,Re:NDC# 0115-0672-50 Zolmitriptan tabkets 5mg...,Zolmitriptan tablets are available as 2.5 mg (...,the orally disintegrating tablets contain anhy...
2,are they gluten free,Active Ingredients Amphetamine Aspartate Amphe...,active Ingredients Amphetamine Aspartate Amphe...
3,vdrl positive patients please tell me what are...,"Syphilis If the RPR, VDRL, or TRUST tests are...",syphilis can spread very easily in the primary...
4,How much glucose is in my GlucaGen HypoKit ? ...,"GLUCAGEN glucagon hydrochloride injection, po...",glucagon hydrochloride is a highly soluble glu...


In [None]:
trec_2017_df['Gemini'] = trec_2017_df['Answer'].head().apply(lambda x: gemini_model.generate_content('Provide a summary of the following text: ' + x).text)

In [None]:
trec_2017_df.head()

Unnamed: 0,Question,Answer,Falcon,Gemini
0,What are the references with noonan syndrome a...,Noonan's syndrome is an eponymic designation t...,noonan's syndrome is an eponymic designation t...,Noonan's syndrome is a condition involving a v...
1,Re:NDC# 0115-0672-50 Zolmitriptan tabkets 5mg...,Zolmitriptan tablets are available as 2.5 mg (...,the orally disintegrating tablets contain anhy...,Zolmitriptan tablets are available in 2.5 mg (...
2,are they gluten free,Active Ingredients Amphetamine Aspartate Amphe...,active Ingredients Amphetamine Aspartate Amphe...,- **Active Ingredients:**\n\n - Amphetamine A...
3,vdrl positive patients please tell me what are...,"Syphilis If the RPR, VDRL, or TRUST tests are...",syphilis can spread very easily in the primary...,"- Early diagnosis of syphilis is crucial, and ..."
4,How much glucose is in my GlucaGen HypoKit ? ...,"GLUCAGEN glucagon hydrochloride injection, po...",glucagon hydrochloride is a highly soluble glu...,Glucagon hydrochloride injection is a medicati...


In [None]:
trec_2017_df['Facebook BART'] = trec_2017_df['Answer'].head().apply(lambda x: bart_summarizer(x, do_sample=False)[0]['summary_text'])

Your max_length is set to 142, but your input_length is only 74. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=37)
Your max_length is set to 142, but your input_length is only 80. Since this is a summarization task, where outputs shorter than the input are typically wanted, you might consider decreasing max_length manually, e.g. summarizer('...', max_length=40)


In [None]:
trec_2017_df.head()

Unnamed: 0,Question,Answer,Falcon,Gemini,Facebook BART
0,What are the references with noonan syndrome a...,Noonan's syndrome is an eponymic designation t...,noonan's syndrome is an eponymic designation t...,Noonan's syndrome is a condition involving a v...,Noonan's syndrome includes groups of patients ...
1,Re:NDC# 0115-0672-50 Zolmitriptan tabkets 5mg...,Zolmitriptan tablets are available as 2.5 mg (...,the orally disintegrating tablets contain anhy...,Zolmitriptan tablets are available in 2.5 mg (...,Zolmitriptan orally disintegrating tablets are...
2,are they gluten free,Active Ingredients Amphetamine Aspartate Amphe...,active Ingredients Amphetamine Aspartate Amphe...,- **Active Ingredients:**\n\n - Amphetamine A...,"Amphetamine Aspartate, Amphetamine Sulfate, Sa..."
3,vdrl positive patients please tell me what are...,"Syphilis If the RPR, VDRL, or TRUST tests are...",syphilis can spread very easily in the primary...,"- Early diagnosis of syphilis is crucial, and ...","If RPR, VDRL, or TRUST tests are positive, one..."
4,How much glucose is in my GlucaGen HypoKit ? ...,"GLUCAGEN glucagon hydrochloride injection, po...",glucagon hydrochloride is a highly soluble glu...,Glucagon hydrochloride injection is a medicati...,"GLUCAGEN glucagon hydrochloride injection, po..."


### ***Rouge***

In [None]:
from rouge import Rouge
import pandas as pd

def get_rouge_scores(text1, text2):
    rouge = Rouge()
    return rouge.get_scores(text1, text2)

rouge_scores_out = []

for idx, row in trec_2017_df.head().iterrows():
    eval_1_rouge = get_rouge_scores(row['Facebook BART'], row['Answer'])
    eval_2_rouge = get_rouge_scores(row['Gemini'], row['Answer'])
    eval_3_rouge = get_rouge_scores(row['Falcon'], row['Answer'])

    for metric in ["rouge-1", "rouge-2", "rouge-l"]:
        for label in ["F-Score"]:
            eval_1_score = eval_1_rouge[0][metric][label[0].lower()]
            eval_2_score = eval_2_rouge[0][metric][label[0].lower()]
            eval_3_score = eval_3_rouge[0][metric][label[0].lower()]

            row = {
                "Metric": f"{metric} ({label})",
                "Facebook BART": eval_1_score,
                "Gemini": eval_2_score,
                "Falcon": eval_3_score,
                "Index": idx
            }
            rouge_scores_out.append(row)

def highlight_max(s):
    is_max = s == s.max()
    return ["background-color: green" if v else "background-color: black" for v in is_max]

rouge_scores_df = pd.DataFrame(rouge_scores_out).set_index(["Index", "Metric"])

rouge_scores_styled = rouge_scores_df.style.apply(highlight_max, axis=1)

rouge_scores_styled


Unnamed: 0_level_0,Unnamed: 1_level_0,Facebook BART,Gemini,Falcon
Index,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,rouge-1 (F-Score),0.509317,0.72449,0.739583
0,rouge-2 (F-Score),0.41,0.460317,0.669388
0,rouge-l (F-Score),0.509317,0.693878,0.739583
1,rouge-1 (F-Score),0.613861,0.474576,0.717949
1,rouge-2 (F-Score),0.481752,0.243902,0.429448
1,rouge-l (F-Score),0.613861,0.457627,0.717949
2,rouge-1 (F-Score),0.458333,0.862745,0.655738
2,rouge-2 (F-Score),0.188679,0.470588,0.477612
2,rouge-l (F-Score),0.458333,0.862745,0.655738
3,rouge-1 (F-Score),0.478528,0.362694,0.62766


### ***BERT***

In [None]:
from bert_score import BERTScorer
import pandas as pd
import numpy as np

scorer = BERTScorer(lang="en", rescale_with_baseline=True)

Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['roberta.pooler.dense.bias', 'roberta.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [None]:
bert_scores_list = []

for idx, row in trec_2017_df.head().iterrows():
    P1, R1, F1 = scorer.score([row['Facebook BART']], [row['Answer']])
    P2, R2, F2 = scorer.score([row['Gemini']], [row['Answer']])
    P3, R3, F3 = scorer.score([row['Falcon']], [row['Answer']])
    row = {
            "Metric": "F1 score",
            "Facebook BART": F1.tolist()[0],
            "Gemini": F2.tolist()[0],
            "Falcon": F3.tolist()[0],
            "Index": idx
        }
    bert_scores_list.append(row)


def highlight_max(s):
    is_max = s == s.max()
    return ["background-color: green" if v else "background-color: black" for v in is_max]

bert_scores_df = pd.DataFrame(bert_scores_list).set_index(["Index", "Metric"])
bert_scores_styled = bert_scores_df.style.apply(highlight_max, axis=1)
bert_scores_styled


Unnamed: 0_level_0,Unnamed: 1_level_0,Facebook BART,Gemini,Falcon
Index,Metric,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,F1 score,0.495239,0.596947,0.648465
1,F1 score,0.449147,0.460936,0.340762
2,F1 score,0.462839,0.306574,0.428046
3,F1 score,0.390964,0.329839,0.429842
4,F1 score,0.764506,-0.038144,-0.294287
