# Summarization

In [None]:
!pip install openai

In [None]:
!pip install Bio

In [None]:
from Bio import Entrez

def get_biopython_abstracts_by_pmids(pmids):
  Entrez.email = "zhalayev@gmail.com"
  handle = Entrez.efetch(db="pubmed", id=pmids, rettype="xml")
  records = Entrez.read(handle)
  biomedexplorer_abstracts_with_pmids = []
  for record in records['PubmedArticle']:
    article = record['MedlineCitation']['Article']
    pmid = record['MedlineCitation']['PMID']
    abstract_elements = article.get('Abstract', {}).get('AbstractText', '')
    # Convert StringElement to string and filter out empty strings
    abstracts = [str(abstract_element) for abstract_element in abstract_elements if str(abstract_element)]
    biomedexplorer_abstracts_with_pmids.append({"PMID": str(pmid), "Abstract": abstracts})
  for entry in biomedexplorer_abstracts_with_pmids:
    if entry['Abstract'] == []:
      entry['Abstract'] = None
    else:
      entry['Abstract'] = ' '.join(entry['Abstract'])
  return biomedexplorer_abstracts_with_pmids

In [None]:
from openai import OpenAI
from google.colab import userdata
client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'), organization="org-cWmpmr0PaRcJuukG9B1grwHm")

In [None]:
import pandas as pd

In [None]:
def get_abstracts_df(abstracts):
  abstracts_df = pd.DataFrame(abstracts, columns=['PMID', 'Abstract'])
  abstracts_df = abstracts_df.dropna(subset=['PMID', 'Abstract'])
  return abstracts_df

In [None]:
def get_abstracts_str(abstracts_df):
  abstracts_lst = abstracts_df['Abstract'].tolist()
  abstracts_str = "   ".join(abstracts_lst)
  abstracts_str = abstracts_str[:64000]
  return abstracts_str

In [None]:
def openai_summarize(abstracts_str):
  openai_response = client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    messages=[
      {"role": "system", "content": "Provide clear and concise summary of the abstracts retrieved from PubMed in one small paragraph"},
      {"role": "user", "content": abstracts_str}
    ]
  )
  return openai_response.choices[0].message.content

In [None]:
def csv_to_df(csv):
  df = pd.read_csv(csv)
  df = df.drop(columns=['Unnamed: 0'])
  return df

In [None]:
BioASQ_all_summary_df = csv_to_df('bioasq_summaries.csv')
BioASQ_all_summary_df

In [None]:
!pip install metapub

In [None]:
!export NCBI_API_KEY=userdata.get('PUBMED_API_KEY')

In [None]:
from metapub import PubMedFetcher

num_of_articles=20

In [None]:
def fetch_pmids_by_query(keyword, num_of_articles):
  fetch = PubMedFetcher()
  return fetch.pmids_for_query(keyword, retmax=num_of_articles)

In [None]:
def combined_operation(row):
  pmids = fetch_pmids_by_query(row['question'], num_of_articles)
  if len(pmids) == 0:
    pmids = row['pmids']
  abstracts = get_biopython_abstracts_by_pmids(pmids)
  print("Question: \n" + row["question"] + "\n")
  print(abstracts)
  abstracts_df = get_abstracts_df(abstracts)
  abstracts_str = get_abstracts_str(abstracts_df)
  return openai_summarize(abstracts_str)

In [None]:
def process_chunk(chunk, csv_path, checkpoint_file):
    # Process chunk and write to CSV
    chunk_results = []
    for index, row in chunk.iterrows():
        try:
            print("-------------")
            result = combined_operation(row)
            print("-------------")
            question = row['question']
            chunk_results.append({'question': question, 'OpenAI PubMed Abstracts Summary': result})

        except Exception as e:
            print(f"Error processing row at index {index}: {e}")
            continue

    # Convert to DataFrame and append to CSV
    chunk_df = pd.DataFrame(chunk_results)
    chunk_df.to_csv(csv_path, mode='a', header=False, index=False)

In [None]:
import time

def main(df, csv_path):
    # Define the size of each chunk
    chunk_size = 60

    # Create a CSV file and write the header
    pd.DataFrame(columns=['question','OpenAI Pubmed Abstracts Summary']).to_csv(csv_path, index=False)

    # Start from the beginning or the last checkpoint
    start_index = 0
    checkpoint_file = 'checkpoint.txt'
    try:
        with open(checkpoint_file, 'r') as f:
            start_index = int(f.read().strip()) + 1
    except FileNotFoundError:
        start_index = 0

    # Chunk the DataFrame
    for start in range(start_index, len(df), chunk_size):
        end = min(start + chunk_size, len(df))
        chunk = df.iloc[start:end]
        try:
            process_chunk(chunk, csv_path, checkpoint_file)
            # Save the last index of the chunk to checkpoint file
            with open(checkpoint_file, 'w') as f:
                f.write(str(end - 1))
        except Exception as e:
            print(f"Error processing chunk starting at index {start}: {e}")
            continue

In [None]:
df = BioASQ_all_summary_df.head(1)
df

In [None]:
main(BioASQ_all_summary_df, 'results.csv')

In [None]:
import pandas as pd

In [None]:
results_df = pd.read_csv("results.csv")
results_df

# Simplification

In [None]:
import json
import pandas as pd

adaptations = []
abstracts = []
pmids = []
question = []
adaptation_version = []
question_type = []
# Download dataset, create connected strings (with '  ' replaced by ' ') and append to lists
data = json.load(open('data.json', 'r'))

# Work through every question number
for question_number, value in data.items():

    # Work through every PMID
    for pmid, texts in value.items():
        if (pmid != 'question') and (pmid != 'question_type'):

            # Append abstracts and adaptations
            # If there are multiple adaptations, duplicate the abstract, pmid, and question
            for i in range(sum('adaptation' in key for key in texts['adaptations'].keys())):

                pmids.append(pmid)
                question.append(question_number)
                abstracts.append(' '.join(texts['abstract'].values()))
                question_type.append(data[question_number]['question_type'])

            if 'adaptation1' in texts['adaptations'].keys():
                adaptations.append(' '.join(texts['adaptations']['adaptation1'].values()).replace('  ', ' '))
                adaptation_version.append(1)


            if 'adaptation2' in texts['adaptations'].keys():
                adaptations.append(' '.join(texts['adaptations']['adaptation2'].values()).replace('  ', ' '))
                adaptation_version.append(2)


            if 'adaptation3' in texts['adaptations'].keys():
                adaptations.append(' '.join(texts['adaptations']['adaptation3'].values()).replace('  ', ' '))
                adaptation_version.append(2)


dataset = pd.DataFrame({'question':question, 'pmid':pmids, 'input_text':abstracts, 'target_text':adaptations,
                        'Adaptation_Version': adaptation_version, 'Question_Type': question_type})
dataset

In [None]:
!pip install openai

In [None]:
from openai import OpenAI
from google.colab import userdata
client = OpenAI(api_key=userdata.get('OPENAI_API_KEY'), organization="org-cWmpmr0PaRcJuukG9B1grwHm")

In [None]:
def openai_simplify(row):
  print("Input: " + row['input_text'])
  openai_response = client.chat.completions.create(
    model="gpt-3.5-turbo-0125",
    messages=[
      {"role": "system", "content": "Provide clear and concise simplified summary of the abstracts retrieved from PubMed in one small paragraph"},
      {"role": "user", "content": row['input_text']}
    ]
  )
  return openai_response.choices[0].message.content

In [None]:
def process_simplification_chunk(chunk, csv_path, checkpoint_file):
    # Process chunk and write to CSV
    chunk_results = []
    for index, row in chunk.iterrows():
        try:
            print("-------------")
            print("Index: " + str(index))
            result = openai_simplify(row)
            print("Result: " + result)
            print("-------------")
            pmid = row['pmid']
            golden_standard = row['target_text']
            chunk_results.append({'pmid': pmid, 'OpenAI PLABA Abstracts Simplification': result, "Golden standard" : golden_standard})

        except Exception as e:
            print(f"Error processing row at index {index}: {e}")
            continue

    # Convert to DataFrame and append to CSV
    chunk_df = pd.DataFrame(chunk_results)
    chunk_df.to_csv(csv_path, mode='a', header=False, index=False)

In [None]:
import time

def main_simplification(df, csv_path):
    # Define the size of each chunk
    chunk_size = 60

    # Create a CSV file and write the header
    pd.DataFrame(columns=['pmid','OpenAI PLABA Abstracts Simplification', 'Golden standard']).to_csv(csv_path, index=False)

    # Start from the beginning or the last checkpoint
    start_index = 0
    checkpoint_file = 'checkpoint.txt'
    try:
        with open(checkpoint_file, 'r') as f:
            start_index = int(f.read().strip()) + 1
    except FileNotFoundError:
        start_index = 0

    # Chunk the DataFrame
    for start in range(start_index, len(df), chunk_size):
        end = min(start + chunk_size, len(df))
        chunk = df.iloc[start:end]
        try:
            process_simplification_chunk(chunk, csv_path, checkpoint_file)
            # Save the last index of the chunk to checkpoint file
            with open(checkpoint_file, 'w') as f:
                f.write(str(end - 1))
        except Exception as e:
            print(f"Error processing chunk starting at index {start}: {e}")
            continue

In [None]:
main_simplification(dataset.head(5), 'openai_simplification_results.csv')

In [None]:
dsf = pd.read_csv('openai_simplification_results.csv')
dsf

In [None]:
main_simplification(dataset, 'openai_simplification_results.csv')

In [None]:
results_df = pd.read_csv('openai_simplification_results.csv')
results_df

# Evaluation of Summaries

In [None]:
import pandas as pd

In [None]:
def csv_to_df(csv):
  df = pd.read_csv(csv)
  df = df.drop(columns=['Unnamed: 0'])
  return df

In [None]:
q_summaries_ideal_df = csv_to_df('q_summaries_ideal.csv')
q_summaries_ideal_df

In [None]:
!pip install rouge --quiet
!pip install bert_score --quiet

In [None]:
from bert_score import BERTScorer

scorer = BERTScorer(lang="en", rescale_with_baseline=True)

In [None]:
def highlight_max(s):
    is_max = s == s.max()
    return ["background-color: green" if v else "background-color: black" for v in is_max]

In [None]:
def eval_bert_scores(df):
  bert_scores_list = []

  for idx, row in df.iterrows():
      print("Index: " + str(idx))
      print("Question: " + row['question'])
      P1, R1, F1 = scorer.score([row['OpenAI Pubmed Abstracts Summary']], [row['ideal_answer']])
      P2, R2, F2 = scorer.score([row["Gemini PubMed Abstracts Summary"]], [row['ideal_answer']])
      row = {
          "Metric": "F1 score",
          "OpenAI Pubmed Abstracts Summary": F1.tolist()[0],
          "Gemini PubMed Abstracts Summary": F2.tolist()[0],
          "Index": idx
      }
      bert_scores_list.append(row)
  return bert_scores_list

In [None]:
bert_scores_list = eval_bert_scores(q_summaries_ideal_df)

In [None]:
bert_scores_df = pd.DataFrame(bert_scores_list).set_index(["Index", "Metric"])
bert_scores_styled = bert_scores_df.style.apply(highlight_max, axis=1)
bert_scores_styled

In [None]:
bert_scores_df.to_csv('bert_scores_df.csv')

# Evaluation of Simplification

In [None]:
openai_simpl_df = pd.read_csv('openai_simplification_results.csv')
openai_simpl_df

In [None]:
gemini_simpl_df = pd.read_csv('simplification_result.csv')
gemini_simpl_df

In [None]:
import json
import pandas as pd

adaptations = []
abstracts = []
pmids = []
question = []
adaptation_version = []
question_type = []
# Download dataset, create connected strings (with '  ' replaced by ' ') and append to lists
data = json.load(open('data.json', 'r'))

# Work through every question number
for question_number, value in data.items():

    # Work through every PMID
    for pmid, texts in value.items():
        if (pmid != 'question') and (pmid != 'question_type'):

            # Append abstracts and adaptations
            # If there are multiple adaptations, duplicate the abstract, pmid, and question
            for i in range(sum('adaptation' in key for key in texts['adaptations'].keys())):

                pmids.append(pmid)
                question.append(question_number)
                abstracts.append(' '.join(texts['abstract'].values()))
                question_type.append(data[question_number]['question_type'])

            if 'adaptation1' in texts['adaptations'].keys():
                adaptations.append(' '.join(texts['adaptations']['adaptation1'].values()).replace('  ', ' '))
                adaptation_version.append(1)


            if 'adaptation2' in texts['adaptations'].keys():
                adaptations.append(' '.join(texts['adaptations']['adaptation2'].values()).replace('  ', ' '))
                adaptation_version.append(2)


            if 'adaptation3' in texts['adaptations'].keys():
                adaptations.append(' '.join(texts['adaptations']['adaptation3'].values()).replace('  ', ' '))
                adaptation_version.append(2)


dataset = pd.DataFrame({'question':question, 'pmid':pmids, 'input_text':abstracts, 'target_text':adaptations,
                        'Adaptation_Version': adaptation_version, 'Question_Type': question_type})
dataset

In [None]:
dataset = dataset.loc[:, ['pmid', 'input_text']]
dataset

In [None]:
all_simplification_df = pd.merge(openai_simpl_df, gemini_simpl_df, on='pmid')
all_simplification_df

In [None]:
all_simplification_df = pd.merge(all_simplification_df, dataset, on='pmid')
all_simplification_df

In [None]:
all_simplification_df = all_simplification_df.drop_duplicates(subset=['pmid'])
all_simplification_df

In [None]:
all_simplification_df.to_csv('all_simplification_results.csv')

In [None]:
all_simplification_df = csv_to_df('all_simplification_results.csv')
all_simplification_df

# Simplification/modified_sari.py implementation

In [None]:
from __future__ import division
from collections import Counter
import glob
import numpy as np
from argparse import ArgumentParser

In [None]:
def is_subsequence(str1,str2):
    m = len(str1)
    n = len(str2)
    i, j = 0, 0
    while j<m and i<n:
        if str1[j] == str2[i]:
            j = j+1
        i = i + 1
    return j==m

def SARIngram(sgrams, cgrams, rgramslist, numref, complex):
    rgramsall = [rgram for rgrams in rgramslist for rgram in rgrams]
    rgramcounter = Counter(rgramsall)

    sgramcounter = Counter(sgrams)
    sgramcounter_rep = Counter()
    for sgram, scount in sgramcounter.items():
        sgramcounter_rep[sgram] = scount * numref

    cgramcounter = Counter(cgrams)
    cgramcounter_rep = Counter()
    for cgram, ccount in cgramcounter.items():
        cgramcounter_rep[cgram] = ccount * numref

    # KEEP
    keepgramcounter_rep = sgramcounter_rep & cgramcounter_rep
    keepgramcountergood_rep = keepgramcounter_rep & rgramcounter
    keepgramcounterall_rep = sgramcounter_rep & rgramcounter

    keeptmpscore1 = 0
    keeptmpscore2 = 0
    for keepgram in keepgramcountergood_rep:
        keeptmpscore1 += keepgramcountergood_rep[keepgram] / keepgramcounter_rep[keepgram]
        keeptmpscore2 += keepgramcountergood_rep[keepgram] / keepgramcounterall_rep[keepgram]
        # print "KEEP", keepgram, keepscore, cgramcounter[keepgram], sgramcounter[keepgram], rgramcounter[keepgram]
    keepscore_precision = 0
    if len(keepgramcounter_rep) > 0:
        keepscore_precision = keeptmpscore1 / len(keepgramcounter_rep)
    keepscore_recall = 0
    if len(keepgramcounterall_rep) > 0:
        keepscore_recall = keeptmpscore2 / len(keepgramcounterall_rep)
    keepscore = 0
    if keepscore_precision > 0 or keepscore_recall > 0:
        keepscore = 2 * keepscore_precision * keepscore_recall / (keepscore_precision + keepscore_recall)

    # DELETION
    delgramcounter_rep = sgramcounter_rep - cgramcounter_rep
    delgramcountergood_rep = delgramcounter_rep - rgramcounter
    delgramcounterall_rep = sgramcounter_rep - rgramcounter
    deltmpscore1 = 0
    deltmpscore2 = 0
    for delgram in delgramcountergood_rep:
        deltmpscore1 += delgramcountergood_rep[delgram] / delgramcounter_rep[delgram]
        deltmpscore2 += delgramcountergood_rep[delgram] / delgramcounterall_rep[delgram]
    delscore_precision = 0
    if len(delgramcounter_rep) > 0:
        delscore_precision = deltmpscore1 / len(delgramcounter_rep)
    delscore_recall = 0
    if len(delgramcounterall_rep) > 0:
        delscore_recall = deltmpscore1 / len(delgramcounterall_rep)
    delscore = 0
    if delscore_precision > 0 or delscore_recall > 0:
        delscore = 2 * delscore_precision * delscore_recall / (delscore_precision + delscore_recall)

    # ADDITION
    addgramcounter = set(cgramcounter) - set(sgramcounter)
    addgramcountergood = set(addgramcounter) & set(rgramcounter)
    addgramcounterall = set(rgramcounter) - set(sgramcounter)

    sgrams_set = set()
    for gram in sgrams:
        sgrams_set.update(gram.split())

    addgramcountergood_new = set()
    for gram in addgramcountergood:
        if any([tok not in sgrams_set for tok in gram.split()]) or not is_subsequence(gram.split(), complex.split()):
            addgramcountergood_new.add(gram)
    addgramcountergood = addgramcountergood_new

    addtmpscore = 0
    for _ in addgramcountergood:
        addtmpscore += 1

    addscore_precision = 0
    addscore_recall = 0
    if len(addgramcounter) > 0:
        addscore_precision = addtmpscore / len(addgramcounter)
    if len(addgramcounterall) > 0:
        addscore_recall = addtmpscore / len(addgramcounterall)
    addscore = 0
    if addscore_precision > 0 or addscore_recall > 0:
        addscore = 2 * addscore_precision * addscore_recall / (addscore_precision + addscore_recall)

    return (keepscore, (delscore_precision, delscore_recall, delscore), addscore)


def SARIsent(ssent, csent, rsents):
    numref = len(rsents)

    s1grams = ssent.lower().split(" ")
    c1grams = csent.lower().split(" ")
    s2grams = []
    c2grams = []
    s3grams = []
    c3grams = []
    s4grams = []
    c4grams = []

    r1gramslist = []
    r2gramslist = []
    r3gramslist = []
    r4gramslist = []

    for rsent in rsents:
        r1grams = rsent.lower().split(" ")
        r2grams = []
        r3grams = []
        r4grams = []
        r1gramslist.append(r1grams)
        for i in range(0, len(r1grams) - 1):
            if i < len(r1grams) - 1:
                r2gram = r1grams[i] + " " + r1grams[i + 1]
                r2grams.append(r2gram)
            if i < len(r1grams) - 2:
                r3gram = r1grams[i] + " " + r1grams[i + 1] + " " + r1grams[i + 2]
                r3grams.append(r3gram)
            if i < len(r1grams) - 3:
                r4gram = r1grams[i] + " " + r1grams[i + 1] + " " + r1grams[i + 2] + " " + r1grams[i + 3]
                r4grams.append(r4gram)
        r2gramslist.append(r2grams)
        r3gramslist.append(r3grams)
        r4gramslist.append(r4grams)

    for i in range(0, len(s1grams) - 1):
        if i < len(s1grams) - 1:
            s2gram = s1grams[i] + " " + s1grams[i + 1]
            s2grams.append(s2gram)
        if i < len(s1grams) - 2:
            s3gram = s1grams[i] + " " + s1grams[i + 1] + " " + s1grams[i + 2]
            s3grams.append(s3gram)
        if i < len(s1grams) - 3:
            s4gram = s1grams[i] + " " + s1grams[i + 1] + " " + s1grams[i + 2] + " " + s1grams[i + 3]
            s4grams.append(s4gram)

    for i in range(0, len(c1grams) - 1):
        if i < len(c1grams) - 1:
            c2gram = c1grams[i] + " " + c1grams[i + 1]
            c2grams.append(c2gram)
        if i < len(c1grams) - 2:
            c3gram = c1grams[i] + " " + c1grams[i + 1] + " " + c1grams[i + 2]
            c3grams.append(c3gram)
        if i < len(c1grams) - 3:
            c4gram = c1grams[i] + " " + c1grams[i + 1] + " " + c1grams[i + 2] + " " + c1grams[i + 3]
            c4grams.append(c4gram)

    (keep1score, del1score, add1score) = SARIngram(s1grams, c1grams, r1gramslist, numref, ssent)
    (keep2score, del2score, add2score) = SARIngram(s2grams, c2grams, r2gramslist, numref, ssent)
    (keep3score, del3score, add3score) = SARIngram(s3grams, c3grams, r3gramslist, numref, ssent)
    (keep4score, del4score, add4score) = SARIngram(s4grams, c4grams, r4gramslist, numref, ssent)

    del1p, del1r, del1f = del1score
    del2p, del2r, del2f = del2score
    del3p, del3r, del3f = del3score
    del4p, del4r, del4f = del4score

    avgkeepscore = sum([keep1score, keep2score, keep3score, keep4score]) / 4
    avgdelpscore = sum([del1p, del2p, del3p, del4p]) / 4
    avgdelrscore = sum([del1r, del2r, del3r, del4r]) / 4
    avgdelfscore = sum([del1f, del2f, del3f, del4f]) / 4
    avgaddscore = sum([add1score, add2score, add3score, add4score]) / 4
    finalpscore = (avgkeepscore + avgdelpscore + avgaddscore) / 3
    finalfscore = (avgkeepscore + avgdelfscore + avgaddscore) / 3
    return avgkeepscore, (avgdelpscore, avgdelrscore, avgdelfscore), avgaddscore, (finalpscore, finalfscore)


def compute_sari(complex_sentences, reference_sentences, simplified_sentences):

    delp_scores = list()
    delr_scores = list()
    delf_scores = list()
    add_scores = list()
    sari_scores = list()
    sarif_scores = list()
    keep_scores = list()
    for i in range(len(simplified_sentences)):
        keep, dels, add, final = SARIsent(complex_sentences[i], simplified_sentences[i],
                                          reference_sentences[i])
        add_scores.append(add)
        delp_scores.append(dels[0])
        delr_scores.append(dels[1])
        delf_scores.append(dels[2])
        keep_scores.append(keep)
        sari_scores.append(final[0])
        sarif_scores.append(final[1])

    return np.mean(sari_scores), np.mean(sarif_scores), np.mean(add_scores), np.mean(keep_scores), np.mean(
        delp_scores), np.mean(delr_scores), np.mean(delf_scores)

In [None]:
def compute_sari_for_df(df):
    sari_scores_openai  = []
    sari_scores_gemini = []

    # Loop over the DataFrame
    for index, row in df.iterrows():
        # Extract the sentences for SARI calculation
        complex_sentence = [row['input_text']]  # it expects a list of sentences, hence the brackets
        reference_sentences = [[row['Golden standard']]]  # list of lists, each inner list corresponds to one reference
        # simplified_sentence = row['OpenAI PLABA Abstracts Simplification']  # assuming you want to calculate for OpenAI's simplifications

        simplified_sentence_openai = row['OpenAI PLABA Abstracts Simplification']
        # Gemini simplification
        simplified_sentence_gemini = row["Gemini PLABA Abstracts Simplification"]
        # Compute SARI scores
        avg_sari_score_openai, avg_sarif_score_openai, avg_add_score_openai, avg_keep_score_openai, avg_delp_score_openai, avg_delr_score_openai, avg_delf_score_openai = compute_sari(
            complex_sentence,
            reference_sentences,
            [simplified_sentence_openai]  # it expects a list of simplified sentences
        )

        # Append the scores to the lists
        sari_scores_openai.append(avg_sari_score_openai)

        avg_sari_score_gemini, avg_sarif_score_gemini, avg_add_score_gemini, avg_keep_score_gemini, avg_delp_score_gemini, avg_delr_score_gemini, avg_delf_score_gemini = compute_sari(
            complex_sentence,
            reference_sentences,
            [simplified_sentence_gemini]  # it expects a list of simplified sentences
        )

        # Append the scores to the lists
        sari_scores_gemini.append(avg_sari_score_gemini)

    # Add the scores back into the DataFrame
    df['OpenAI SARI Score'] = sari_scores_openai
    df['Gemini SARI Score'] = sari_scores_gemini

    return df

In [None]:
all_simplification_df = compute_sari_for_df(all_simplification_df)
all_simplification_df

In [None]:
all_simplification_df.to_csv('SARI_scores_results.csv')

In [None]:
# Calculate the mean SARI score for OpenAI
openai_mean_sari = all_simplification_df['OpenAI SARI Score'].mean()

# Calculate the mean SARI score for Gemini
gemini_mean_sari = all_simplification_df['Gemini SARI Score'].mean()

# Determine which is higher and output the result
if openai_mean_sari > gemini_mean_sari:
    print(f"OpenAI scored higher on average with a SARI score of {openai_mean_sari:.3f}")
elif gemini_mean_sari > openai_mean_sari:
    print(f"Gemini scored higher on average with a SARI score of {gemini_mean_sari:.3f}")
else:
    print(f"Both systems scored the same on average with a SARI score of {openai_mean_sari:.3f}")


In [None]:
openai_mean_sari

In [None]:
gemini_mean_sari