## Rouge Evaluation

In [None]:
import rouge
import pandas as pd
import os

# Get the list of files in the directory
directory = 'input_docs/docs'
output_directory = 'output_FairExtract/'
file_list = os.listdir(directory)

df = pd.DataFrame(columns=['Topic', 'R1', 'RL'])

for file in file_list:
    evaluator = rouge.Rouge(metrics=['rouge-1', 'rouge-l'])

    hypothesis = ' '. join(pd.read_csv(output_directory + file)['text'])
    reference = ' '.join(pd.read_csv('input_docs/docs/' + file)['text'])

    scores = evaluator.get_scores(hypothesis, reference)
    new_row = pd.DataFrame({'Topic': [file[:-4]], 'R1': [scores[0]['rouge-1']['f']],
                            'RL': [scores[0]['rouge-l']['f']]})
    df = pd.concat([df, new_row], ignore_index=True)
df = df.sort_values(by='Topic')
df

In [None]:
df.to_csv('evaluation/rouge_scores.csv', index=False)

print(df.R1.mean(), df.RL.mean())

## SummaQA Evaluation

In [None]:
from summaqa import QG_masked
from summaqa import QA_Metric
import pandas as pd
import os

question_generator = QG_masked()
qa_metric = QA_Metric()

# Get the list of files in the directory
directory = 'input_docs/docs/'
file_list = os.listdir(directory)
output_dir = 'outputs/output_FairGPT'
eval_dir = 'FairGPT'

dir_dict = {'White': 'H-A', 'Hisp': 'A-W', 'AA': 'W-H'}

for remove_label in ['White', 'Hisp', 'AA']:
    output_directory = f'{output_dir}/{dir_dict[remove_label]}/'
    df = pd.DataFrame(columns=['Topic', 'SQA'])
    for file in file_list:
        docs = pd.read_csv(directory + file)
        docs = docs[docs['label'] != remove_label]
        article = ' '.join(docs['text'])
        masked_questions, answer_spans = question_generator.get_questions(article)
    
        summary = ' '. join(pd.read_csv(output_directory + file)['text'])
        score = qa_metric.compute(masked_questions, answer_spans, summary)
    
        new_row = pd.DataFrame({'Topic': [file[:-4]], 'SQA': [score['avg_fscore']]})
        df = pd.concat([df, new_row], ignore_index=True)
    df = df.sort_values(by='Topic')
    df.to_csv(f'evaluation/{eval_dir}/{dir_dict[remove_label]}/SummaQA-score.csv', index=False)
    print(dir_dict[remove_label], df.SQA.mean())

## BLANC Evaluation

In [None]:
import os
import pandas as pd
from blanc import BlancHelp, BlancTune
import torch

device = 'cuda:0'if torch.cuda.is_available() else 'cpu'

# Get the list of files in the directory
directory = 'input_docs/docs/'
file_list = os.listdir(directory)

output_dir = 'outputs/output_FairGPT'
eval_dir = 'FairGPT'

dir_dict = {'White': 'H-A', 'Hisp': 'A-W', 'AA': 'W-H'}

for remove_label in ['White', 'Hisp', 'AA']:
    output_directory = f'{output_dir}/{dir_dict[remove_label]}/'
    df = pd.DataFrame(columns=['Topic', 'BLANC-help', 'BLANC-tune'])
    blanc_help = BlancHelp(device=device)
    blanc_tune = BlancTune(device=device, finetune_mask_evenly=False, show_progress_bar=False)
    
    for file in file_list:
        docs = pd.read_csv(directory + file)
        docs = docs[docs['label'] != remove_label]
        document = ' '.join(docs['text'])
    
        summary = ' '. join(pd.read_csv(output_directory + file)['text'])
        score_h = blanc_help.eval_once(document, summary)
        score_t = blanc_tune.eval_once(document, summary)
    
        new_row = pd.DataFrame({'Topic': [file[:-4]], 'BLANC-help': [score_h],
                                'BLANC-tune': [score_t]})
        df = pd.concat([df, new_row], ignore_index=True)
    df = df.sort_values(by='Topic')
    df.to_csv(f'evaluation/{eval_dir}/{dir_dict[remove_label]}/BLANC-score.csv', index=False)
    print(dir_dict[remove_label], df['BLANC-help'].mean())

## BARTScore Evaluation

In [None]:
!git clone https://github.com/neulab/BARTScore

In [None]:
import os
import pandas as pd
from BARTScore.bart_score import BARTScorer

# Get the list of files in the directory
directory = 'input_docs/docs/'
file_list = os.listdir(directory)

output_dir = 'outputs/output_FairGPT'
eval_dir = 'FairGPT'

dir_dict = {'White': 'H-A', 'Hisp': 'A-W', 'AA': 'W-H'}

for remove_label in ['White', 'Hisp', 'AA']:
    output_directory = f'{output_dir}/{dir_dict[remove_label]}/'
    df = pd.DataFrame(columns=['Topic', 'BARTScore'])
    bart_scorer = BARTScorer(device='cuda:0', checkpoint='facebook/bart-large-cnn')

    for file in file_list:
        docs = pd.read_csv(directory + file)
        docs = docs[docs['label'] != remove_label]
        document = ' '.join(docs['text'])
        
        summary = ' '. join(pd.read_csv(output_directory + file)['text'])
        score = bart_scorer.score([document], [summary], batch_size=4)
    
        new_row = pd.DataFrame({'Topic': [file[:-4]], 'BARTScore': [score[0]]})
        df = pd.concat([df, new_row], ignore_index=True)
    
    df = df.sort_values(by='Topic')
    df.to_csv(f'evaluation/{eval_dir}/{dir_dict[remove_label]}/BARTScore.csv', index=False)
    print(dir_dict[remove_label], df['BARTScore'].mean())

## SUPERT Evaluation

In [None]:
!git clone https://github.com/yg211/acl20-ref-free-eval.git

In [None]:
import os
import pandas as pd

import sys
sys.path.append("acl20-ref-free-eval")

from ref_free_metrics.supert import Supert

# Get the list of files in the directory
directory = 'input_docs/docs/'
file_list = os.listdir(directory)


output_dir = 'outputs/output_FairGPT'
eval_dir = 'FairGPT'

dir_dict = {'White': 'H-A', 'Hisp': 'A-W', 'AA': 'W-H'}

for remove_label in ['White', 'Hisp', 'AA']:
    output_directory = f'{output_dir}/{dir_dict[remove_label]}/'
    df = pd.DataFrame(columns=['Topic', 'SUPERT'])
    
    for file in file_list:
        docs = pd.read_csv(directory + file)
        docs = docs[docs['label'] != remove_label]
        document = [docs['text']]
        
        summary = [' '. join(pd.read_csv(output_directory + file)['text'])]

        # compute the Supert scores
        supert = Supert(document, ref_metric='top15')
        score = supert(summary)
    
        new_row = pd.DataFrame({'Topic': [file[:-4]], 'SUPERT': [score[0]]})
        df = pd.concat([df, new_row], ignore_index=True)
    df = df.sort_values(by='Topic')
    df.to_csv(f'evaluation/{eval_dir}/{dir_dict[remove_label]}/SUPERT.csv', index=False)
    print(dir_dict[remove_label], df['SUPERT'].mean())

## Representation Gap Evaluation

In [None]:
import pandas as pd
import os

# Get the list of files in the directory
output_dir = 'outputs/output_FairGPT'
eval_dir = 'FairGPT'

dir_dict = {'White': 'H-A', 'Hisp': 'A-W', 'AA': 'W-H'}

for remove_label in ['White', 'Hisp', 'AA']:
    output_directory = f'{output_dir}/{dir_dict[remove_label]}/'
    file_list = os.listdir(output_directory)
    
    df = pd.DataFrame(columns=['Topic', 'Representation Gap'])
    for file in file_list:
        labels = pd.read_csv(output_directory + '/' + file)['label']
        max_percentage = labels.value_counts().max() / labels.value_counts().sum()
        min_percentage = 1 - max_percentage
        new_row = pd.DataFrame({'Topic': [file[:-4]], 'Representation Gap': [max_percentage - min_percentage]})
        df = pd.concat([df, new_row], ignore_index=True)
        
    df = df.sort_values(by='Topic')

    df.to_csv(f'evaluation/{eval_dir}/{dir_dict[remove_label]}/Representation-Gap.csv', index=False)
    print(dir_dict[remove_label], df['Representation Gap'].mean())

## UniEval Evaluation

In [None]:
import sys
sys.path.append("UniEval")
from utils import convert_to_json
from metric.evaluator import get_evaluator
from IPython.display import clear_output

import pandas as pd
import os

# Get the list of files in the directory
directory = 'input_docs/docs/'
file_list = os.listdir(directory)


output_dir = 'outputs/output_FairGPT'
eval_dir = 'FairGPT'

dir_dict = {'White': 'H-A', 'Hisp': 'A-W', 'AA': 'W-H'}

evaluator = get_evaluator("summarization")

for remove_label in ['White', 'Hisp', 'AA']:
    output_directory = f'{output_dir}/{dir_dict[remove_label]}/'
    df = pd.DataFrame(columns=['Topic', 'coherence', 'consistency', 'fluency', 'overall'])

    for file in file_list:
        docs = pd.read_csv(directory + file)
        docs = docs[docs['label'] != remove_label]
        document = ' '.join(docs['text'])

        summary = [' '. join(pd.read_csv(output_directory + file)['text'])]

        data = convert_to_json(output_list=summary,
                               src_list=document)

        # compute the UniEval scores
        eval_scores = evaluator.evaluate(data, dims=['coherence', 'consistency', 'fluency'],
                                         overall=True, print_result=True)
        new_row = pd.DataFrame({'Topic': [file[:-4]], 
                                'coherence': eval_scores[0]['coherence'],
                                'consistency' : eval_scores[0]['consistency'],
                                'fluency': eval_scores[0]['fluency'],
                                'overall': eval_scores[0]['overall']})
        df = pd.concat([df, new_row], ignore_index=True)
        clear_output()
    df = df.sort_values(by='Topic')
    df.to_csv(f'evaluation/{eval_dir}/{dir_dict[remove_label]}/UniEval.csv', index=False)

## All Metrics in same table

In [None]:
## in following we will put all csv files in one table
import pandas as pd
import os

# Get the list of files in the directory
group_list = ['H-A', 'A-W', 'W-H']

directory_path = 'evaluation/'
items = os.listdir(directory_path)

# Filter out only the folders (directories)
folders = [item for item in items if os.path.isdir(os.path.join(directory_path, item))]
with pd.ExcelWriter('evaluation/eval.xlsx') as writer:
    for folder in folders:
        if folder in ['naive', 'naiveFair']:
            df_result = pd.DataFrame(columns=['Topic', 'Groups', 'Iteration'])
            for group in group_list:
                for i in range(1, 6):
                    directory = f'evaluation/{folder}/{group}/{i}/'
                    file_list = os.listdir(directory)
                    df = pd.DataFrame(columns=['Topic', 'Groups', 'Iteration'])
                    for file in file_list:
                        if file[-4:] == '.csv':
                            new_df = pd.read_csv(directory + file)
                            df = pd.merge(df, new_df, on='Topic', how='outer')
                            df.Groups = df.Groups.fillna(group)
                            df.Iteration = df.Iteration.fillna(i)
                    df_result = pd.concat([df_result, df], ignore_index=True)
        else:
            df_result = pd.DataFrame(columns=['Topic', 'Groups'])
            for group in group_list:
                directory = f'evaluation/{folder}/{group}/'
                file_list = os.listdir(directory)
                df = pd.DataFrame(columns=['Topic', 'Groups'])    
                for file in file_list:
                    if file[-4:] == '.csv':
                        new_df = pd.read_csv(directory + file)
                        df = pd.merge(df, new_df, on='Topic', how='outer')
                        df.Groups = df.Groups.fillna(group)
                df_result = pd.concat([df_result, df], ignore_index=True)
        df_result.to_excel(writer, sheet_name=folder, index=False)