In [12]:
import openai
import pandas as pd
import parameters as params
import re

def load_API_KEY(api_path):
    with open(api_path, 'r') as f:
        openai.api_key = f.readline()
    
def load_data(file_path):
    df = pd.read_csv(file_path)
    return df

def save_data(save_path, df):
    df.to_csv(save_path, index = False)
    
def has_numbers(inputString):
    return any(char.isdigit() for char in inputString)

def generate_synonyms(prompt, engine=params.engine, temp=params.temp, top_p=params.top_p, tokens=params.tokens, 
                      freq_pen=params.freq_pen, pres_pen=params.pres_pen, stop=['ASSISTANT:', 'USER:']):
    
    prompt = prompt.encode(encoding='ASCII',errors='ignore').decode()
    
    response = openai.Completion.create(engine=engine,
        prompt=prompt,
        temperature=temp,
        max_tokens=tokens,
        top_p=top_p,
        frequency_penalty=freq_pen,
        presence_penalty=pres_pen,
        stop=stop)
    
    text = response['choices'][0]['text'].strip()
    return text

def clean_output(synonyms):
    df = []
    for i in range(len(synonyms)):
        cleaned_data = []
        
        if ',' in synonyms[i]:
            comma_words = synonyms[i].split(',')
            df.append(comma_words)
        
        elif has_numbers(synonyms[i]):
            words_list = synonyms[i].split('.')
            for word in words_list:
                if len(word) > 3:
                    word = word.replace('\n','')
                    word = re.sub('\d', '', word)
                    cleaned_data.append(word)

            df.append(cleaned_data)
        
        elif 'and' in synonyms[i]:
            and_words = synonyms[i].split('and')
            df.append(and_words)
        else:
            space_separated_words = synonyms[i].split('\n')
            df.append(space_separated_words)
            
    df = pd.DataFrame(df)
    return df

synonyms = []
api_path = 'API_KEY.txt'
file_path = params.input_file
save_path = params.output_file
prompt = params.prompt

load_API_KEY(api_path)
data = load_data(file_path)

for i in range(len(data['Description'])):
    response = generate_synonyms(prompt + data.loc[i, 'Description'])
    synonyms.append(response)

df = clean_output(synonyms)
df = pd.concat([data, df], axis=1)

save_data(save_path, df)