# Libraries

In [None]:
import utils as u
import pandas as pd
import os

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)

# Choose Model

In [None]:
model = 'flan-t5-base' #'vicuna-7b' #'llama7b', #'flan-t5-base', 'flan-t5-large','flan-t5-xl', 'flan-t5-xxl'

# Dataset

In [None]:
dataset = u.read_file(f"Dataset/P-AT-{model}.json")

In [None]:
output_dir = f'P-AT-{model}'
if not os.path.exists(output_dir):
    os.mkdir(output_dir)

# Processing

1) P-AT-gender

In [None]:
gender_df = {}
gender_df[model] = {}
for weat in dataset['P-AT-gender']:
    gender_df[model][weat] = u.pd.DataFrame(dataset['P-AT-gender'][weat])

2) P-AT-base

In [None]:
base_df = {}
base_df[model] = {}
for weat in dataset['P-AT-base']:
    base_df[model][weat] = u.pd.DataFrame(dataset['P-AT-base'][weat])

3) P-AT-race

In [None]:
race_df = {}
race_df[model] = {}
for weat in dataset['P-AT-race']:
    race_df[model][weat] = u.pd.DataFrame(dataset['P-AT-race'][weat])

4) P-AT-disability

In [None]:
disability_df = {}
disability_df[model] = {}
for weat in dataset['P-AT-disability']:
    disability_df[model][weat] = u.pd.DataFrame(dataset['P-AT-disability'][weat])

5) P-AT-age

In [None]:
age_df = {}
age_df[model] = {}
for weat in dataset['P-AT-age']:
    age_df[model][weat] = u.pd.DataFrame(dataset['P-AT-age'][weat])

# Evaluation

In [None]:
import numpy as np
from scipy.stats import fisher_exact

In [None]:
def compute_chisquare(count_given_test):
    pvalues = []
    for i in range(len(count_given_test)):
        per_prompts_counts =  count_given_test[0].reset_index().set_index(['instruction','category'])[['attribute1-count', 'attribute2-count', 'total']].sum(level=[0,1])
        
        instructions = [instruction for instruction, category in per_prompts_counts.index]
        for instruction in np.unique(instructions):
            f_obs = per_prompts_counts.loc[instruction][['attribute1-count', 'attribute2-count']].values
            statistic, pvalue = fisher_exact(f_obs)

            pvalues.append(pvalue)
            
        
        counts = count_given_test[i].reset_index().set_index(['category'])[['attribute1-count', 'attribute2-count', 'total']].sum(level=0)
        f_obs = counts[['attribute1-count', 'attribute2-count']].values     
        display(f_obs)
        
        statistic, pvalue = fisher_exact(f_obs)

        pvalues.append(pvalue)
    return np.array(pvalues)

In [None]:
gender_tasks = ['weat7', 'weat8', 'weat6']

gender_targets = [
    u.targets['weat7_target'],
    u.targets['weat8_target'],
    u.targets['weat6_target'],
]

gender_all_attributes = [u.attributes['gender_weat7_attr'], 
                  u.attributes['gender_weat8_attr'], 
                  u.attributes['gender_weat6_attr']]


u.Mode.set_mode(7)


datasets_gender = u.create_datasets(gender_df, 
                                   [model],
                                   tasks=gender_tasks,
                                   all_targets= gender_targets,
                                   all_attributes=gender_all_attributes, 
                                   instruction_column='base_instruction',
                                   force_order=True)
pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)
scores_gender, count_given_test = u.Evaluator.compute(datasets_gender, [model])

In [None]:
alpha5, alpha10 = 0.05,  0.10
pvalues = compute_chisquare(count_given_test) #test against null hyp of indipendece of categorical variables

print("**************************************")
print("pvalues < alpha5 ", pvalues < alpha5)
print("pvalues < alpha10", pvalues < alpha10)

In [None]:
scores_gender["pvalue"] = pvalues
scores_gender["pvalue < alpha5 "] = pvalues < alpha5
scores_gender["pvalue < alpha10"] = pvalues < alpha10

scores_gender

In [None]:
scores_gender.to_csv(f'{output_dir}/P-AT-gender_{model}.csv')

## Base

### Pleasant

In [None]:
base_pleasant_tasks = ['weat1','weat2', 'weat3','weat3b', 'weat4', 'weat10']

base_pleasant_targets = [u.targets['weat1_target'],
                u.targets['weat2_target'], 
                u.targets['weat3_target'],
                u.targets['weat3b_target'], 
                u.targets['weat4_target'], 
                u.targets['weat10_target'],
                ]

base_pleasant_all_attributes = [u.attributes['base_weat_pleasant_attr'],#, weat 1 
                  u.attributes['base_weat_pleasant_attr'],    # weat 2
                  u.attributes['base_weat_pleasant_attr'],    # weat 3
                  u.attributes['base_weat_pleasant_attr'],    # weat 3b
                  u.attributes['base_weat_pleasant_attr'],    # weat 4
                  u.attributes['base_weat_pleasant_attr'],     # weat 10
                 ]


u.Mode.set_mode(1)


datasets_base_pleasant = u.create_datasets(base_df, [model],
                                   tasks=base_pleasant_tasks,
                                   all_targets= base_pleasant_targets,
                                   all_attributes=base_pleasant_all_attributes, 
                                   instruction_column='instruction',
                                   force_order=True)

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)
scores_base_pleasant, count_given_test  = u.Evaluator.compute(datasets_base_pleasant, [model])

In [None]:
scores_base_pleasant

In [None]:
alpha5, alpha10 = 0.05,  0.10
pvalues = compute_chisquare(count_given_test)
scores_base_pleasant["pvalue"] = pvalues

scores_base_pleasant["pvalue < alpha5 "] = pvalues < alpha5
scores_base_pleasant["pvalue < alpha10"] = pvalues < alpha10

scores_base_pleasant

In [None]:
scores_base_pleasant.to_csv(f'{output_dir}/P-AT-base_pleasant_{model}.csv')

### Career

In [None]:
base_career_tasks = ['weat6']
base_career_targets = [u.targets['weat6_target']]
base_career_all_attributes = [u.attributes['base_weat_career']]# weat 6


u.Mode.set_mode(6)


datasets_base_career = u.create_datasets(base_df, [model],
                                   tasks=base_career_tasks,
                                   all_targets= base_career_targets,
                                   all_attributes=base_career_all_attributes, 
                                   instruction_column='instruction',
                                   force_order=True)

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)
scores_base_career, count_given_test = u.Evaluator.compute(datasets_base_career, [model])

In [None]:
scores_base_career

In [None]:
alpha5, alpha10 = 0.05,  0.10
pvalues = compute_chisquare(count_given_test)

scores_base_career["pvalue"] = pvalues
scores_base_career["pvalue < alpha5 "] = pvalues < alpha5
scores_base_career["pvalue < alpha10"] = pvalues < alpha10

scores_base_career

In [None]:
scores_base_career.to_csv(f'{output_dir}/P-AT-base_career_{model}.csv')

### Family

In [None]:
base_gender_tasks = ['weat7','weat8']
base_gender_targets = [u.targets['weat7_target'], u.targets['weat8_target']]
base_gender_all_attributes = [u.attributes['base_weat_gender'], u.attributes['base_weat_gender']]# weat 6


u.Mode.set_mode(7)

datasets_base_gender = u.create_datasets(base_df, [model],
                                   tasks=base_gender_tasks,
                                   all_targets= base_gender_targets,
                                   all_attributes=base_gender_all_attributes, 
                                   instruction_column='instruction',
                                   force_order=True)

scores_base_gender, count_given_test = u.Evaluator.compute(datasets_base_gender, [model])

In [None]:
alpha5, alpha10 = 0.05,  0.10
pvalues = compute_chisquare(count_given_test)
scores_base_gender["pvalue"] = pvalues

scores_base_gender["pvalue < alpha5 "] = pvalues < alpha5
scores_base_gender["pvalue < alpha10"] = pvalues < alpha10

scores_base_gender

In [None]:
scores_base_gender.to_csv(f'{output_dir}/P-AT-base_gender_{model}.csv')

### Disease

In [None]:
base_disease_tasks = ['weat9']
base_disease_targets = [u.targets['weat9_target']]
base_disease_all_attributes = [u.attributes['base_weat_disease']]# weat 6


u.Mode.set_mode(9)


datasets_base_disease = u.create_datasets(base_df, [model],
                                   tasks=base_disease_tasks,
                                   all_targets=base_disease_targets,
                                   all_attributes=base_disease_all_attributes, 
                                   instruction_column='instruction',
                                   force_order=True)

pd.set_option("display.max_rows", None)
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)
scores_base_disease, count_given_test = u.Evaluator.compute(datasets_base_disease, [model])

In [None]:
alpha5, alpha10 = 0.05,  0.10
pvalues = compute_chisquare(count_given_test) #contro hyp di indipendenza
scores_base_disease["pvalue"] = pvalues

scores_base_disease["pvalue < alpha5 "] = pvalues < alpha5
scores_base_disease["pvalue < alpha10"] = pvalues < alpha10

scores_base_disease

In [None]:
scores_base_disease.to_csv(f'{output_dir}/P-AT-base_disease_{model}.csv')

## Gender

## Race

In [None]:
race_tasks = ['weat3', 'weat4']

race_targets = [
    u.targets['weat3_target'],
    u.targets['weat4_target'],
]

race_all_attributes = [u.attributes['race_weat_3_4_attr'], 
                  u.attributes['race_weat_3_4_attr']]


u.Mode.set_mode(1)

datasets_race = u.create_datasets(race_df, 
                                   [model],
                                   tasks=race_tasks,
                                   all_targets= race_targets,
                                   all_attributes=race_all_attributes, 
                                   instruction_column='base_instruction',
                                   force_order=True)

In [None]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)
scores_race, count_given_test = u.Evaluator.compute(datasets_race, [model])

In [None]:
alpha5, alpha10 = 0.05,  0.10
pvalues = compute_chisquare(count_given_test) 
scores_race["pvalue"] = pvalues

scores_race["pvalue < alpha5 "] = pvalues < alpha5
scores_race["pvalue < alpha10"] = pvalues < alpha10

scores_race

In [None]:
scores_race.to_csv(f'{output_dir}/P-AT-race_{model}.csv')

## Age

In [None]:
age_tasks = ['weat10']

age_targets = [
    u.targets['weat10_target'],
]

age_all_attributes = [u.attributes['age_weat_10_attr']]


u.Mode.set_mode(10)

datasets_age = u.create_datasets(age_df, 
                                   [model],
                                   tasks=age_tasks,
                                   all_targets=age_targets,
                                   all_attributes=age_all_attributes, 
                                   instruction_column='base_instruction',
                                   force_order=True)

In [None]:
pd.set_option("display.max_columns", None)
pd.set_option("display.max_colwidth", None)
scores_age, count_given_test = u.Evaluator.compute(datasets_age, [model])

In [None]:
scores_age

In [None]:
alpha5, alpha10 = 0.05,  0.10
pvalues = compute_chisquare(count_given_test) 
scores_age["pvalue"] = pvalues


scores_age["pvalue < alpha5 "] = pvalues < alpha5
scores_age["pvalue < alpha10"] = pvalues < alpha10

scores_age

In [None]:
scores_age.to_csv(f'{output_dir}/P-AT-age_{model}.csv')