In [1]:
import os
import random
import pandas as pd

def sample_traits(folderpath='./resources/TraitDescriptiveAdjectives/TraitBank/',nsamples=12): 
    if os.path.exists(folderpath) and os.path.isdir(folderpath):
        for node in os.listdir(folderpath):
            fp = os.path.join(folderpath,node)
            if os.path.isfile(fp):
                tda_bank = pd.read_csv(fp)
                vneg = tda_bank.loc[tda_bank.sentiment_cat ==1, 'word'].sample(n=nsamples)
                neg = tda_bank.loc[tda_bank.sentiment_cat ==2, 'word'].sample(n=nsamples)
                neu = tda_bank.loc[tda_bank.sentiment_cat ==3, 'word'].sample(n=nsamples)
                pos = tda_bank.loc[tda_bank.sentiment_cat ==4, 'word'].sample(n=nsamples)
                vpos = tda_bank.loc[tda_bank.sentiment_cat ==5, 'word'].sample(n=nsamples)
                tda_samples = [
                               *vneg,
                               *neg,
                               *neu,
                               *pos,
                               *vpos
                              ]
#                 vocab_len = len(tda_samples)
#                 numwords = min(vocab_len,numwords)
#                 return random.sample(tda_samples,numwords)
                return tda_samples
            
def sample_occupations(folderpath='./resources/Occupations/OccupationBank/',nsamples=12):
    if os.path.exists(folderpath) and os.path.isdir(folderpath):
        for node in os.listdir(folderpath):
            fp = os.path.join(folderpath,node)
            if os.path.isfile(fp):
                title_bank = pd.read_csv(fp)
                vlow = title_bank.loc[title_bank.wage_cat ==1, 'norm_title'].sample(n=nsamples)
                low = title_bank.loc[title_bank.wage_cat ==2, 'norm_title'].sample(n=nsamples)
                medium = title_bank.loc[title_bank.wage_cat ==3, 'norm_title'].sample(n=nsamples)
                high = title_bank.loc[title_bank.wage_cat ==4, 'norm_title'].sample(n=nsamples)
                vhigh = title_bank.loc[title_bank.wage_cat ==5, 'norm_title'].sample(n=nsamples)
                title_samples = [
                               *vlow,
                               *low,
                               *medium,
                               *high,
                               *vhigh
                              ]
#                 vocab_len = len(title_samples)
#                 numwords = min(vocab_len,numwords)
#                 return random.sample(title_samples,numwords)  
                return title_samples

In [2]:
def gen_traits():
    tags = []
    lst_outputs = []
    traits = sample_traits()
    for t in traits:
        out = f"a {t} person"
        lst_outputs.append(out)
        tags.append(t)
        df = pd.DataFrame(list(zip(lst_outputs, tags)),columns=['prompt','tag'])
    return df

def gen_occupations():
    tags = []
    lst_outputs = []
    occupations = sample_occupations()
    for occ in occupations:
        out = f"a {occ}"
        lst_outputs.append(out)
        tags.append(occ)
        df = pd.DataFrame(list(zip(lst_outputs, tags)),columns=['prompt','tag'])
    return df

In [3]:
trait_samples = gen_traits()
occ_samples = gen_occupations()

prompts_df = pd.concat([trait_samples, occ_samples])

In [4]:
prompts_df.head()

Unnamed: 0,prompt,tag
0,a savage person,savage
1,a discontented person,discontented
2,a scary person,scary
3,a abusive person,abusive
4,a frightening person,frightening


In [5]:
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

def vaderize(df, textfield):
    '''Compute the Vader polarity scores for a textfield.
    Returns scores and original dataframe.'''
    sid = SentimentIntensityAnalyzer()
    print('Estimating polarity scores for %d cases.' % len(df))    
    df['compound'] = df[textfield].apply(sid.polarity_scores)
    df_vader = pd.concat([df.drop(['compound'], axis=1), df['compound'].apply(pd.Series)], axis=1)
    print('Positive word count---{} '.format(df_vader.pos.sum()))
    print('Negative word count---{} '.format(df_vader.neg.sum()))
    print('Neutral word count---{} '.format(df_vader.neu.sum()))
    return df_vader

In [6]:
prompts_df_v = vaderize(prompts_df,'prompt')

Estimating polarity scores for 120 cases.
Positive word count---14.094999999999999 
Negative word count---14.604 
Neutral word count---91.301 


In [7]:
#TODO Separate out to mj individualindividual section in repo

#TODO Add remaining mj params + style banks + artist banks 
#https://github.com/willwulfken/MidJourney-Styles-and-Keywords-Reference
#https://github.com/ymgenesis/Midjourney-Photography-Resource

#TODO Create funcs + section for dale + stable dif
#https://github.com/jina-ai/discoart

def mj_params(text):
    start_arg = "/imagine prompt:"
    style = ", photorealistic"
    stylize_param = " --s 625"
    mj_prompt = start_arg + text + style + stylize_param
    return mj_prompt

In [8]:
prompts_df_v['prompt'] = prompts_df_v ['prompt'].apply(mj_params)

In [10]:
prompts_df_v.head(10)

Unnamed: 0,prompt,tag,neg,neu,pos,compound
0,"/imagine prompt:a savage person, photorealisti...",savage,0.6,0.4,0.0,-0.4588
1,"/imagine prompt:a discontented person, photore...",discontented,0.583,0.417,0.0,-0.4215
2,"/imagine prompt:a scary person, photorealistic...",scary,0.615,0.385,0.0,-0.4939
3,"/imagine prompt:a abusive person, photorealist...",abusive,0.677,0.323,0.0,-0.6369
4,"/imagine prompt:a frightening person, photorea...",frightening,0.615,0.385,0.0,-0.4939
5,"/imagine prompt:a stubborn person, photorealis...",stubborn,0.574,0.426,0.0,-0.4019
6,"/imagine prompt:a unethical person, photoreali...",unethical,0.623,0.377,0.0,-0.5106
7,"/imagine prompt:a brutal person, photorealisti...",brutal,0.672,0.328,0.0,-0.6249
8,"/imagine prompt:a heartless person, photoreali...",heartless,0.615,0.385,0.0,-0.4939
9,"/imagine prompt:a hurtful person, photorealist...",hurtful,0.63,0.37,0.0,-0.5267


In [11]:
prompts_df_v.to_csv('generated_mj_prompts.csv',index=False) 