In [None]:
import os
import glob
import pandas as pd
import re
import warnings
from saac.evaluation.eval_utils import load_tda_data, load_occupation_data, rgb_intensity

warnings.filterwarnings('once')

In [None]:
'''
Midjourney has a file name limit of 100 characters,which cuts off a number of the submitted prompts in the file name
In order to merge the results with the generated prompt data
We will parse and extract the base prompt from the image_file column
'''
'''
Midjourney has a file name limit of 100 characters,which cuts off a number of the submitted prompts in the file name
In order to merge the results with the generated prompt data
We will parse and extract the base prompt from the image_file column
'''
def load_image_analysis_results():
    eval_data_path = os.path.join('data','raw')
    
    files = glob.glob(os.path.join(eval_data_path, '*.csv'))
    colnames = ['prompt','image','quadrant','bbox','skin color','gender.Woman','gender.Man']
    results = pd.concat([pd.read_csv(fp,header=0, names=colnames)\
                     .assign(model =os.path.basename(fp).split('_')[0]) for fp in files],sort=False)
  
    results['prompt'] = results['prompt'].apply(lambda x: 'a '+x+" photorealistic" if x[0]!='a' and 'photorealistic' not in x else x)
    #Normalizing gender categories
    results['gender.Woman'] = results['gender.Woman'].apply(lambda x: x / 100.)
    results['gender.Man'] = results['gender.Man'].apply(lambda x: x / 100.)
    #Mapping gender detection values to single column
    noface= (results['skin color'].isnull()).values
    unknown=  ((results['gender.Woman']<=.50) & (results['gender.Man']<=.50)).values
    woman= ((results['gender.Woman']>=.50) & (results['gender.Man']<.50)).values
    man= ((results['gender.Man']>=.50) & (results['gender.Woman']<.50)).values

    results['gender_detected_cat'] = 0
    results['gender_detected_cat'][noface]=1
    results['gender_detected_cat'][unknown]=2
    results['gender_detected_cat'][woman]=3
    results['gender_detected_cat'][man]=4
    gender_dict=  {
                 1: 'no face',
                 2: 'unknown',
                 3: 'woman',
                 4: 'man'
                    }
    results['gender_detected_val'] = results['gender_detected_cat'].map(gender_dict)
    #Extracting RGB intensity from skin color
    results['rgb_intensity'] = results['skin color'].apply(lambda x:rgb_intensity(eval(x)) if not pd.isna(x) else None)
    return results
load_image_analysis_results()

In [None]:
def load_prompts():
    prompt_data_path = os.path.join('..', 'prompt_generation','data','processed')
    files = glob.glob(os.path.join(prompt_data_path, '*.csv'))
    cols = [0, 1, 5]
    colnames = ['prompt','tag','compound']
    prompts = pd.concat([pd.read_csv(fp,header=0,usecols= cols, names=colnames)for fp in files],sort=False)
    base_prompt =[]
    for line in prompts['prompt']:
        if '/imagine prompt:' in line: # "/imagine prompt:a pitiless person, photorealistic --s 625"
            line = line.replace('/imagine prompt:','')
            line = line.replace(', photorealistic --s 625',' photorealistic')
            line = line.replace(',',' ')
            line = re.sub(' +',' ',line).strip().rstrip()
            base_prompt.append(line)
        else: # "a passionless person, photorealistic"
            line = line.replace(',', ' ')
            line = re.sub(' +', ' ', line).strip().rstrip()
            base_prompt.append(line)
    prompts['prompt'] = base_prompt
    return prompts
load_prompts()

In [None]:
def get_occupation_results(res_prompts):
    occupations = load_occupation_data()
    occ_results = res_prompts.merge(occupations,right_on=['norm_title'],left_on=['tag'],how='inner')
    return occ_results

def get_tda_results(res_prompts):
    tda_data = load_tda_data()
    tda_results = res_prompts.merge(tda_data,right_on=['tda'],left_on=['tag'],how='inner')
    return tda_results

image_analysis = load_image_analysis_results()
prompts = load_prompts()
res_prompts = image_analysis.merge(prompts, right_on=['prompt'], left_on=['prompt'])


In [None]:
tda = get_tda_results(res_prompts)
print(len(tda))

In [None]:
tda.head()

In [None]:
occ = get_occupation_results(res_prompts)
print(len(occ))

In [None]:
occ.head()

In [None]:
savepath='./data/processed/'

tda.to_csv(savepath+ 'TDA_Results.csv',index=False)
occ.to_csv(savepath+ 'Occupation_Results.csv',index=False)