# Acronym experiment


In [1]:
import toml
import numpy as np
import torch
import random
import pandas as pd
import json
import time
from tqdm import tqdm
from matplotlib import pyplot as plt
from itertools import permutations
import os
import openai
from utils.gpt_api import multi_turn_chatgpt
from utils.text_logger import text_logger
from utils.acronym_utils import *

M = 20
M_easy = 10 # [2, 4, 6, 8, 10]
SEED = 1314 # [10086, 42, 14843, 1314]
BIAS_TYPE = "easy"  # [easy, easylong, easyshort, hard, random, imitation]
M_hard = M - M_easy
GLOBAL_TMP = 1
GEN = 6
LOOK_BACK = 0
INPUT_FIRST = False
UPPER = True
MODEL_NAME =  "claude-3-haiku-20240307"  
MODEL_NAME2 = "claude-3-haiku-20240307"
# ["gpt-3.5-turbo-0125", "gpt-4-0125-preview"]
# ["claude-3-haiku-20240307", "claude-3-sonnet-20240229", "claude-instant-1.2"]
# ["mistral-tiny-2312", "mistral-small-2312"]

EXP_NAME = "inputfirst%s_upper%s_M%s_easy%d_G%s_seed%s"%(str(INPUT_FIRST)[0],str(UPPER)[0],M,M_easy, GEN, SEED)
EXP_PATH = './exp_logs_' + MODEL_NAME + '/acronym/'+BIAS_TYPE
exp_path = os.path.join(EXP_PATH, EXP_NAME)

LOG = text_logger(file_name='chat_log', exp_path=exp_path, silence = True)
LOG.write_to_file('This is an experiment trying to see how generated data evolves')
LOG.write_to_file('In this experiment, M is %d, G is %d, input first is %s, upper is %s. Global temperature is %f\n'%(M, GEN, str(INPUT_FIRST), str(UPPER), GLOBAL_TMP))

def rnd_seed(seed):
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed) 
rnd_seed(SEED)

## 0.Prepare the data $d^0$

- Where the d0 comes from:
  We select half from easy setting ("acronym1_11.txt") and half from hard setting ("acronym1_51.txt"). Make sure the output words in the easy setting are in COCA-top60k, while those in the hard setting are not.

In [2]:
# SUP_PATH = 'acronym_supports'
# COCA60K = pd.read_csv(os.path.join(SUP_PATH,'COCA 60000.csv'), encoding='ISO-8859-1')
# easy_d0set = pd.read_csv(os.path.join(SUP_PATH,'easy.csv'))
# hard_d0set = pd.read_csv(os.path.join(SUP_PATH,'hard.csv'))

In [3]:
class data_manager():
    def __init__(self, sup_path='acronym_supports', input_first=False, upper=False):
        self.sup_path = sup_path
        self.COCA60K = pd.read_csv(os.path.join(self.sup_path,'COCA 60000.csv'), encoding='ISO-8859-1')
        self.easy_d0set = pd.read_csv(os.path.join(self.sup_path,'easy.csv'))
        self.hard_d0set = pd.read_csv(os.path.join(self.sup_path,'hard.csv'))
        self.data_pool = pd.DataFrame(columns=['Output_upper','Output_lower','Input','RANK','TOTAL','generation'])
        self.input_first = input_first
        self.upper = upper

    def word_ranking_total(self, word):
        tgt_row = self.COCA60K[self.COCA60K['WORD']==word.lower()]
        if len(tgt_row)>0:
            tgt_rank, tgt_total = tgt_row['RANK'].iloc[0], tgt_row['TOTAL'].iloc[0]
        else:
            #tgt_rank, tgt_total = -1, 0
            tgt_rank, tgt_total = 60001, 0
        return tgt_rank, tgt_total
        
    def generate_d0(self, M_easy, M_hard, shuffle=True):
        self.data_pool = pd.DataFrame(columns=['Output_upper','Output_lower','Input','RANK','TOTAL','generation'])
        d0_easy = self.easy_d0set.sample(n=M_easy)
        d0_hard = self.hard_d0set.sample(n=M_hard)
        d0 = pd.concat([d0_easy,d0_hard]).sample(frac=1)
        d0 = d0.drop(labels='Unnamed: 0',axis=1)
        d0.insert(d0.shape[1],"generation",0)
        self.data_pool = pd.concat([self.data_pool,d0])
        return d0
    
    def get_data_json(self, data_pd):
        fb_json = ""
        for _, row in data_pd.iterrows():
            out = row['Output_upper'] if self.upper else row['Output_lower']
            inp = row['Input']
            if self.input_first:
                fb_json +=  '{"Input": "%s", "Output": "%s"}\n'%(inp, out)
            else:
                fb_json += '{"Output": "%s", "Input": "%s"}\n'%(out, inp)
        return fb_json
        
    def get_data_str(self, data_pd):
        fb_str = ""
        for _, row in data_pd.iterrows():
            out = row['Output_upper'] if self.upper else row['Output_lower']
            inp = row['Input']
            if self.input_first:
                fb_str += "Input: %s\nOutput: %s\n\n"%(inp, out)
            else:
                fb_str += "Output: %s\nInput: %s\n\n"%(out, inp)
        return fb_str
    
    def json_to_df(self, json_words, gen):
        fb_dict = {'Output_upper':[],'Output_lower':[],'Input':[],'RANK':[],'TOTAL':[],'generation':[]}
        fb_json = json.loads(json_words)
        if 'Output' in fb_json.keys():
            out_key = "Output"
            inp_key = "Input"
        else:
            out_key = "output"
            inp_key = "input"            
        fb_dict['Output_upper'] = fb_json[out_key].upper()
        fb_dict['Output_lower'] = fb_json[out_key].lower()
        fb_dict['Input'] = fb_json[inp_key]
        rank, total = DATA.word_ranking_total(fb_json['Output'].lower())
        fb_dict['RANK'] = rank
        fb_dict['TOTAL'] = total
        fb_dict['generation'] = gen
        fb_df = pd.DataFrame([fb_dict])
        return fb_df
    
    def add_to_data_pool(self, json_words, gen=0):
        df = self.json_to_df(json_words, gen)
        self.data_pool = pd.concat([self.data_pool,df])
        
    def pool_reset_to_d0(self):
        self.data_pool = self.data_pool.drop(self.data_pool[self.data_pool['generation']!=0].index)

    def get_biased_df(self, bias_type, OUT_M):
        if bias_type=='easy':
            return self.data_pool.sort_values(by='RANK', ascending=True)[:OUT_M]
        elif bias_type=='hard':
            return self.data_pool.sort_values(by='RANK', ascending=False)[:OUT_M]
        elif bias_type=='easyshort':
            mask = self.data_pool['RANK']!=60001
            return self.data_pool[mask].sort_values(by='TOTAL', ascending=True)[:OUT_M]
        elif bias_type=='easylong':
            mask = self.data_pool['RANK']!=60001
            return self.data_pool[mask].sort_values(by='TOTAL', ascending=False)[:OUT_M]
        elif bias_type=='random':
            return self.data_pool.sample(n=OUT_M)
    
    def get_generation_df(self, gen=0):
        return self.data_pool[self.data_pool['generation']==gen]

In [4]:
DATA = data_manager(sup_path='E:\\P5_iICL\\iterated_learning_exp\\acronym_supports', input_first=INPUT_FIRST, upper=UPPER)
d0 = DATA.generate_d0(M_easy=M_easy, M_hard=M_hard, shuffle=True)

## 1. Iteratively generate new examples

In [5]:
#persona = "You are a pattern following assistant."
if BIAS_TYPE == 'easylong':
    persona = "You favor long words." #"You are a helpful assistant."#
elif BIAS_TYPE == 'easyshort':
    persona = "You favor short words a lot." #"You are a helpful assistant."#
elif BIAS_TYPE == 'hard':
    persona = "You favor rare words." #"You are a helpful assistant."#
else:
    persona = "You favor common words." #"You are a helpful assistant."#
LOG.msg_to_gpt(persona)
GPT_AGENT = multi_turn_chatgpt(model=MODEL_NAME, temperature=GLOBAL_TMP, top_p=1, logger=LOG, game_description=persona)
GPT_AGENT2 = multi_turn_chatgpt(model=MODEL_NAME2, temperature=GLOBAL_TMP*0.5, top_p=1, logger=LOG, game_description=persona)

In [6]:
# side_instruction = "the input comes first"
# json_format = "- Input: str, the list of words\n- Output: str, the acronym"
# d0_str = DATA.get_data_str(d0,upper=False, out_first=False)

# def get_data_prompt(d_str, M, d_json):
#     if d_json is not None:
#         add = "\n%s"%d_json
#     else:
#         add = ""
#     if INPUT_FIRST:
#         side_instruction = "the input comes first"
#         json_format = '- Input: str, the list of words\n- Output: str, the acronym.'%add        
#     else:
#         side_instruction = "the output comes first"
#         json_format = '- Output: str, the acronym\n- Input: str, the list of words.'%add
    
#     data_prompt = "Here are some input-output pairs. The input is a list of words. The output is the concatenation of the first letter of each word in the input, i.e., its acronym. For example:\n\n%sPlease provide %d more examples following this pattern, where %s.\nPlease ONLY return a JSON string with the following keys:\n%s"%(d_str, M, side_instruction, json_format)
#     return data_prompt

def get_data_prompt(d_str, M, d_json):
    if INPUT_FIRST:
        side_instruction = "the input comes first"
        json_format = '- Input: str, the list of words\n- Output: str, the acronym.'     
    else:
        side_instruction = "the output comes first"
        json_format = '- Output: str, the acronym\n- Input: str, the list of words.'
    
    data_prompt = "Here are some input-output pairs. The input is a list of words. The output is the concatenation of the first letter of each word in the input, i.e., its acronym. For example:\n\n%sPlease provide %d different examples following this pattern, where %s.\n%s\nPlease return examples strictly following this format:\n%s"%(d_str, M, side_instruction, persona, d_json)
    return data_prompt

In [7]:
d0_str = DATA.get_data_str(d0)
if INPUT_FIRST:
    example_json = '{"Input": "word1, word2, ...", "Output": "acronym"}\n{"Input": "word1, word2, ...", "Output": "acronym"}'
else:
    example_json = '{"Output": "acronym", "Input": "word1, word2, ..."}\n{"Output": "acronym", "Input": "word1, word2, ..."}'
#d0_json = DATA.get_data_json(d0[-2:])
d0_json = example_json
d0_prompt = get_data_prompt(d0_str, M,d_json=d0_json)
dg_prompt = d0_prompt
for g in tqdm(range(GEN)):
    LOG.write_to_file('----------- Gen %d -----------'%g)
    if g%2==0:
        hd_feedback, _, cnt_tokens = GPT_AGENT.call_chatgpt(dg_prompt, fake_response=None, 
                                                  logprobs=False, top_logprobs=None, lookback_round=LOOK_BACK)
    else:
        hd_feedback, _, cnt_tokens = GPT_AGENT2.call_chatgpt(dg_prompt, fake_response=None, 
                                                  logprobs=False, top_logprobs=None, lookback_round=LOOK_BACK)        
    # ------ Put the generated data into data.pool
    #fb_list = hd_feedback.split('\n')
    fb_list = get_fblist_from_hdfeedback(hd_feedback)
    for i in range(len(fb_list)):
        if len(fb_list[i].strip())>0:
            DATA.add_to_data_pool(fb_list[i].strip(), gen=g+1)

    # ------------ Filter on dt
    if BIAS_TYPE=="imitation":
        dg = DATA.get_generation_df(gen=g+1)
    else:
        dg = DATA.get_biased_df(bias_type=BIAS_TYPE, OUT_M=M) 
    
    # ------- Get the data generated by this generation
    dg_str = DATA.get_data_str(dg)
    #dg_json = DATA.get_data_json(dg[-2:])
    dg_json = example_json
    dg_prompt = get_data_prompt(dg_str, M, dg_json)

100%|██████████| 6/6 [01:56<00:00, 19.47s/it]


In [8]:
DATA.data_pool.to_csv(os.path.join(exp_path,'data.csv'))

In [9]:
data_pool = DATA.data_pool

## 2. Visualize results

In [10]:
# ----------- Calculate the length of acronyms
def cal_acro_length_df(df_slice):
    cnt_num, cnt_str = 0, 0
    for _, row in df_slice.iterrows():
        cnt_num += 1
        cnt_str += len(row['Output_upper'])
    return np.sum(cnt_str)/np.sum(cnt_num)

# ------ Plot ratio all-data
def _get_rank_mean(rnk_list):
    rank_cnt = 0
    for i in range(len(rnk_list)):
        if rnk_list[i] == -1:
            rank_cnt += 60000
        else:
            rank_cnt += rnk_list[i]
    return rank_cnt/len(rnk_list)

def get_ratio_and_rank(data_pool, old_results=False):
    ratio_list, ratioall_list, avgrank_list,easyrank_list = [], [], [], []
    avglen_list = []
    all_easy, all_hard = 0, 0
    for g in range(GEN+1):
        if old_results:
            mask_hard = data_pool[data_pool['generation']==g]['RANK']==-1
            mask_easy = data_pool[data_pool['generation']==g]['RANK']>0
        else:
            mask_hard = data_pool[data_pool['generation']==g]['RANK']==60001
            mask_easy = data_pool[data_pool['generation']==g]['RANK']<60001
        mask_gen = data_pool['generation']==g
        avg_len = cal_acro_length_df(data_pool[mask_gen])
        cnt_easy, cnt_hard = mask_easy.sum(), mask_hard.sum()
        all_easy += cnt_easy
        all_hard += cnt_hard
        ratio_list.append(cnt_easy / (cnt_easy+cnt_hard))
        ratioall_list.append(all_easy / (all_easy+all_hard))
        avgrank = _get_rank_mean(list(data_pool[data_pool['generation']==g]['RANK']))
        easyrank = data_pool[data_pool['generation']==g][mask_easy]['RANK'].mean()
        avgrank_list.append(avgrank)
        easyrank_list.append(easyrank)
        avglen_list.append(avg_len)
    return ratio_list, ratioall_list, avgrank_list, easyrank_list, avglen_list

In [None]:
fig, ax = plt.subplots(1,4,figsize=(20,4))
# ------ Plot ratio per-gen
ratio_list, ratioall_list, avgrank_list, easyrank_list, avglen_list = get_ratio_and_rank(data_pool)
ax[0].plot(ratio_list)
ax[1].plot(ratioall_list)
ax[2].plot(avgrank_list)
ax[3].plot(avglen_list)

## 3. Genearate figures

In [None]:
import matplotlib.cm as cm
import matplotlib as mpl
from matplotlib.colors import ListedColormap
Ms = [2,4,6,8,10]#[10, 8, 6, 4, 2]#
Seeds = [10086, 42, 14843, 1314]
COLORS = ["#2878B5", "#8983bf", "#B1CE46","orange" , "#D76364"]#"#F1D77E"
#COLORS = [cm.tab20c.colors[0], cm.tab20c.colors[1], cm.tab20c.colors[3], cm.tab20c.colors[7],cm.tab20c.colors[4]]
#COLORS = ["#F27970", "#BB9727", "#54B345", "#05B9E2", "#C76DA2"]
#COLORS = ["blue", "red", "green", "yellow", "cyan", "black","#63E398"]

fig, ax = plt.subplots(1,2,figsize=(12,5))
x_axis = np.arange(0, 6)
for j in range(len(Ms)):
    m = Ms[j]
    ratio_np = np.zeros((len(Seeds),6))
    ratioall_np = np.zeros((len(Seeds),6))
    avgrank_np = np.zeros((len(Seeds),6))
    easyrank_np = np.zeros((len(Seeds),6))
    for i in range(len(Seeds)):
        exp_name = "inputfirstF_upperT_M20_easy%d_G6_seed%d"%(m, Seeds[i])
        #exp_name = "inputfirstF_upperF_M20_easy%d_G6_seed%d"%(m, Seeds[i])
        main_path = "./exp_logs_gpt-3.5-turbo-0125/acronym/easy"
        #main_path = "./exp_logs_claude-3-haiku-20240307/acronym/imitation"
        
        data_pool = pd.read_csv(os.path.join(main_path, exp_name,'data.csv'))
        ratio_list, ratioall_list, avgrank_list, easyrank_list = get_ratio_and_rank(data_pool)
        ratio_np[i,:], ratioall_np[i,:], avgrank_np[i,:], easyrank_np[i,:] = ratio_list, ratioall_list, avgrank_list, easyrank_list
        
    ax[0].plot(x_axis, ratio_np.mean(0), color=COLORS[j],linewidth=2, marker='+', markersize=15, label='$N_e$ %d'%m,)
    ax[0].fill_between(x_axis, ratio_np.mean(0)-ratio_np.var(0), ratio_np.mean(0)+ratio_np.var(0), alpha=0.1, color=COLORS[j])
    ax[1].plot(x_axis, avgrank_np.mean(0), color=COLORS[j],linewidth=2, marker='+', markersize=15)
    ax[1].fill_between(x_axis, avgrank_np.mean(0)-0.5*avgrank_np.std(0), avgrank_np.mean(0)+0.5*avgrank_np.std(0), alpha=0.1, color=COLORS[j])
ax[0].legend(fontsize=14)
ax[0].set_xlabel("Generation",fontsize=16)
ax[0].set_ylabel("Ratio of easy samples",fontsize=16)
ax[0].grid()
ax[1].set_xlabel("Generation",fontsize=16)
ax[1].set_ylabel("Average rank of $d^t$",fontsize=16)
ax[1].set_yticklabels(["0","0k", "10k", "20k", "30k", "40k", "50k"])
ax[1].grid()

In [None]:
import matplotlib.cm as cm
import matplotlib as mpl
from matplotlib.colors import ListedColormap
Ms = [2,4,6,8,10]#[10, 8, 6, 4, 2]#
Seeds = [10086, 42, 14843, 1314]
COLORS = ["#2878B5", "#8983bf", "#B1CE46", "#F1D77E", "#D76364"]
#COLORS = [cm.tab20c.colors[0], cm.tab20c.colors[1], cm.tab20c.colors[3], cm.tab20c.colors[7],cm.tab20c.colors[4]]
#COLORS = ["#F27970", "#BB9727", "#54B345", "#05B9E2", "#C76DA2"]
#COLORS = ["blue", "red", "green", "yellow", "cyan", "black","#63E398"]
fig, ax = plt.subplots(1,4,figsize=(20,4))
x_axis = np.arange(0, 6)
for j in range(len(Ms)):
    m = Ms[j]
    ratio_np = np.zeros((len(Seeds),6))
    ratioall_np = np.zeros((len(Seeds),6))
    avgrank_np = np.zeros((len(Seeds),6))
    easyrank_np = np.zeros((len(Seeds),6))
    for i in range(len(Seeds)):
        exp_name = "inputfirstF_upperT_M20_easy%d_G6_seed%d"%(m, Seeds[i])
        main_path = "./exp_logs_acronym"
        data_pool = pd.read_csv(os.path.join(main_path, exp_name,'data.csv'))
        ratio_list, ratioall_list, avgrank_list, easyrank_list = get_ratio_and_rank(data_pool)
        ratio_np[i,:], ratioall_np[i,:], avgrank_np[i,:], easyrank_np[i,:] = ratio_list, ratioall_list, avgrank_list, easyrank_list
        
    ax[0].plot(x_axis, ratioall_np.mean(0), label='%d easy in d0'%m, color=COLORS[j])
    ax[0].fill_between(x_axis, ratioall_np.mean(0)-ratioall_np.var(0), ratioall_np.mean(0)+ratioall_np.var(0), alpha=0.15, color=COLORS[j])
    ax[1].plot(x_axis, ratio_np.mean(0), color=COLORS[j])
    ax[1].fill_between(x_axis, ratio_np.mean(0)-ratio_np.var(0), ratio_np.mean(0)+ratio_np.var(0), alpha=0.15, color=COLORS[j])
    ax[2].plot(x_axis, avgrank_np.mean(0), color=COLORS[j])
    ax[2].fill_between(x_axis, avgrank_np.mean(0)-0.5*avgrank_np.std(0), avgrank_np.mean(0)+0.5*avgrank_np.std(0), alpha=0.15, color=COLORS[j])
    ax[3].plot(x_axis, easyrank_np.mean(0), color=COLORS[j])
    ax[3].fill_between(x_axis, easyrank_np.mean(0)-0.5*easyrank_np.std(0), easyrank_np.mean(0)+0.5*easyrank_np.std(0), alpha=0.15, color=COLORS[j])
ax[0].legend(fontsize=16)

### Backup code