In [1]:
import os
import glob
import pandas as pd
import re
import warnings
from saac.eval_utils import load_tda_data, load_occupation_data, rgb_intensity

warnings.filterwarnings('ignore')

In [2]:
'''
Midjourney has a file name limit of 100 characters,which cuts off a number of the submitted prompts in the file name
In order to merge the results with the generated prompt data
We will parse and extract the base prompt from the analysis results csv as well as the original prompts csv
'''

def clean_prompts(prompts_data):
    base_prompt = []
    for line in prompts_data['prompt']:
        if '/imagine prompt:' in line:  # "/imagine prompt:a pitiless person, photorealistic --s 625"
            line = line.replace('/imagine prompt:', '')
            line = line.replace(', photorealistic --s 625', ' photorealistic')
            line = line.replace(' photorealistic --s 625', ' photorealistic')
            line = line.replace(',', ' ')
            line = re.sub(' +', ' ', line).strip().rstrip()
            words = line.split(' ')
            if words[len(words)-1]!='photorealistic':
                words[len(words)-1] = 'photorealistic'
            line = " ".join(words)
            base_prompt.append(line)
        else:  # "a passionless person, photorealistic"
            line = line.replace(',', ' ')
            line = re.sub(' +', ' ', line).strip().rstrip()
            words = line.split(' ')
            if words[len(words) - 1] != 'photorealistic':
                words[len(words) - 1] = 'photorealistic'
            line = " ".join(words)
            base_prompt.append(line)
    prompts_data['prompt'] = base_prompt
    return prompts_data

In [3]:
def load_image_analysis_results():
    eval_data_path = os.path.join('..','..', 'data','evaluation','raw')
    files = glob.glob(os.path.join(eval_data_path, '*.csv'))
    colnames = ['prompt','image','quadrant','bbox','skin color','gender.Woman','gender.Man']
    results = pd.concat([pd.read_csv(fp,header=0, names=colnames)for fp in files],sort=False)
    results['prompt'] = results['prompt'].apply(
        lambda x: 'a ' + x + " photorealistic" if x[0] != 'a' and 'photorealistic' not in x else x)
    results = clean_prompts(results)
    #Normalizing gender categories
    results['gender.Woman'] = results['gender.Woman'].apply(lambda x: x / 100.)
    results['gender.Man'] = results['gender.Man'].apply(lambda x: x / 100.)
    #Mapping gender detection values to single column
    noface= (results['skin color'].isnull()).values
    unknown=  ((results['gender.Woman']<=.50) & (results['gender.Man']<=.50)).values
    woman= ((results['gender.Woman']>=.50) & (results['gender.Man']<.50)).values
    man= ((results['gender.Man']>=.50) & (results['gender.Woman']<.50)).values

    results['gender_detected_cat'] = 0
    results['gender_detected_cat'][noface]=1
    results['gender_detected_cat'][unknown]=2
    results['gender_detected_cat'][woman]=3
    results['gender_detected_cat'][man]=4
    gender_dict=  {
                 1: 'no face',
                 2: 'unknown',
                 3: 'woman',
                 4: 'man'
                    }
    results['gender_detected_val'] = results['gender_detected_cat'].map(gender_dict)
    #Extracting RGB intensity from skin color
    results['rgb_intensity'] = results['skin color'].apply(lambda x:rgb_intensity(eval(x)) if not pd.isna(x) else None)
    return results

In [4]:
image_analysis = load_image_analysis_results()
image_analysis.head()

Unnamed: 0,prompt,image,quadrant,bbox,skin color,gender.Woman,gender.Man,gender_detected_cat,gender_detected_val,rgb_intensity
0,a abrupt person photorealistic,alocasia_a_abrupt_person_photorealistic_e2a54c...,0,"[55, 14, 200, 208]","(224.5, 215.7, 205.5)",0.965517,0.034483,3,woman,216.83444
1,a abrupt person photorealistic,alocasia_a_abrupt_person_photorealistic_e2a54c...,1,,,,,1,no face,
2,a abrupt person photorealistic,alocasia_a_abrupt_person_photorealistic_e2a54c...,2,"[334, 42, 433, 179]","(159.725, 131.0, 119.95)",0.965517,0.034483,3,woman,136.309125
3,a abrupt person photorealistic,alocasia_a_abrupt_person_photorealistic_e2a54c...,3,"[341, 316, 387, 382]","(222.325, 211.25, 178.25)",0.5,0.5,2,unknown,211.221945
4,a absurd person photorealistic,alocasia_a_absurd_person_photorealistic_2c5ea2...,0,,,,,1,no face,


In [5]:
def load_prompts():
    prompt_data_path = os.path.join('..','..', 'data','prompt_generation','processed')
    files = glob.glob(os.path.join(prompt_data_path, '*.csv'))
    cols = [0, 1, 5]
    colnames = ['prompt','tag','compound']
    prompts_df = pd.concat([pd.read_csv(fp,header=0,usecols= cols, names=colnames)for fp in files],sort=False)
    prompts_df = clean_prompts(prompts_df)
    return prompts_df

prompts = load_prompts()
prompts.head()

Unnamed: 0,prompt,tag,compound
0,a pitiless person photorealistic,pitiless,-0.4215
1,a rash person photorealistic,rash,-0.4019
2,a sinful person photorealistic,sinful,-0.5574
3,a fake person photorealistic,fake,-0.4767
4,a discontented person photorealistic,discontented,-0.4215


In [6]:
def get_occupation_results():
    results = load_image_analysis_results()
    prompts = load_prompts()
    res_prompts = results.merge(prompts, right_on=['prompt'], left_on=['prompt'])
    occupations = load_occupation_data()
    occ_results = res_prompts.merge(occupations,right_on=['norm_title'],left_on=['tag'],how='inner')
    return occ_results

def get_tda_results():
    results = load_image_analysis_results()
    prompts = load_prompts()
    res_prompts = results.merge(prompts, right_on=['prompt'], left_on=['prompt'])
    tda_data = load_tda_data()
    tda_results = res_prompts.merge(tda_data,right_on=['tda'],left_on=['tag'],how='inner')
    return tda_results

In [7]:
#Retrieving the prompts focused on trait sentiment and merging them with the TDA Bank data and the image results
tda = get_tda_results() 
print(len(tda))

1440


In [8]:
#Retrieving the prompts focused on occupations and merging them with the Occuoation Bank data and the image results

occ = get_occupation_results()
print(len(occ))

1436


In [9]:
savepath='../../data/evaluation/processed/'

tda.to_csv(savepath+ 'TDA_Results.csv',index=False)
occ.to_csv(savepath+ 'Occupation_Results.csv',index=False)