In [69]:
import pandas as pd
from pandas import DataFrame

path_data_old: str = '../../data/processed/all_games_merged_old_processed.jsonl'
path_referencegame_new: str = '../../data/training_data/D70005.csv'

all_old_data: pd.DataFrame = pd.read_json(path_data, lines=True)
reference_game_new: pd.DataFrame = pd.read_csv(path_referencegame_new)

In [70]:
games_to_consider: list = [
    'imagegame',
    'privateshared',
    'wordle',
    'wordle_withclue',
    'wordle_withcritic',
    'taboo',
]

data_old_no_reference_game: pd.DataFrame = all_old_data[all_old_data.game.isin(games_to_consider)]

In [71]:
data_old_no_reference_game.game.unique()

array(['wordle', 'wordle_withclue', 'privateshared', 'imagegame', 'taboo',
       'wordle_withcritic'], dtype=object)

In [72]:
len(data_old_no_reference_game), len(reference_game_new)

(18090, 2758)

In [73]:
data_old_no_reference_game_success = data_old_no_reference_game[data_old_no_referencegame.Success == 1]

In [74]:
len(data_old_no_reference_game_success), len(reference_game_new)

(2644, 2758)

In [75]:
data_old_no_referencegame_success.columns

Index(['game', 'game_id', 'model', 'benchmark_version', 'experiment',
       'episode', 'Aborted', 'Lose', 'Success', 'chat', 'target', 'player'],
      dtype='object')

In [76]:
reference_game_clean= reference_game_new.drop(['Unnamed: 0', 'text'], axis=1)

In [77]:
reference_game_clean.chat = reference_game_clean.chat.map(eval)

In [78]:
reference_game_clean.chat.iloc[0]

[{'role': 'user',
  'content': 'You are given three grids, where each of them is 5 by 5 in size.\nGrids have empty cells marked with "▢" and filled cells marked with "X".\nYour task is to generate a referring expression that best describes the target grid while distinguishing it from the two other distractor grids.\nThe first grid is the target grid, and the following two grids are the distractors.\n\nTarget grid:\n\nX X X X X\n▢ ▢ ▢ ▢ ▢\n▢ ▢ ▢ ▢ ▢\n▢ ▢ ▢ ▢ ▢\n▢ ▢ ▢ ▢ ▢\n\nDistractor grid 1:\n\n▢ ▢ ▢ ▢ ▢\nX X X X X\n▢ ▢ ▢ ▢ ▢\n▢ ▢ ▢ ▢ ▢\n▢ ▢ ▢ ▢ ▢\n\nDistractor grid 2:\n\n▢ ▢ ▢ ▢ ▢\n▢ ▢ ▢ ▢ ▢\nX X X X X\n▢ ▢ ▢ ▢ ▢\n▢ ▢ ▢ ▢ ▢\n\nInstruction: Describe the target grid.\nGenerate the referring expression starting with the tag "Expression: " for the given target grid. Omit any other text.'},
 {'role': 'assistant',
  'content': 'Expression: The grid with the filled cells in the top row.'}]

In [79]:
merged_data = pd.concat([data_old_no_reference_game_success, reference_game_clean])

In [80]:
len(merged_data)

5402

In [84]:
import copy


def ensure_alternating_roles(messages, cull_system_message: bool = True):
    """
    The messages format assumes alternating roles of user and assistant. This method checks, if this constraint
    is satisfied. If this is not the case and there are consecutive user or assistant messages,
    then these are merged into a single one.

    :param cull_system_message:
    :param messages: to be checked
    :return: a new messages object with the alternating roles ensured
    """
    consec_msg = 0
    _messages = copy.deepcopy(messages)

    if cull_system_message:
        if _messages[0]['role'] == "system" and not _messages[0]["content"]:
            del _messages[0]

    def is_same_role(msg1, msg2):
        return msg1["role"] == msg2["role"]

    delimiter = "\n\n"

    def join_content(msg1, msg2):
        return f"{msg1['content']}{delimiter}{msg2['content']}"

    if len(_messages) <= 1:
        return _messages

    def is_valid(idx):
        return idx < len(_messages)

    msg_idx = 1
    while is_valid(msg_idx):
        prev_message = _messages[msg_idx - 1]
        message = _messages[msg_idx]
        if is_same_role(prev_message, message):
            warn_msg = (f"Found consecutive role assignments. These will be merged into one:\n"
                        f"{prev_message}\n"
                        f"{message}")
            #logger.warning(warn_msg)
            #print("X")
            consec_msg += 1
            prev_message['content'] = join_content(prev_message, message)
            del _messages[msg_idx]
        else:
            msg_idx += 1
    #print(f"{consec_msg} consecutive messages have been merged!")
    return _messages

In [93]:
def prepare_qa_data(data: DataFrame) -> DataFrame:
    result_data: pd.DataFrame = {key: [] for key in data.columns}

    for i, row in data.iterrows():
        chat = row['chat']
        game = row['game']

        chat_last_index = len(chat)
        
        if game == 'privateshared':
            chat_last_index = len(chat)-1
            messages = []
            probes = []
            for i, item in enumerate(chat):
                if "message" in item["type"]:
                    messages.append(item)
                elif "probe" in item["type"]:
                    if i != chat_last_index:
                        if "probe" in chat[i+1]["type"]: # if next turn is also part of probing
                            probes.append(item)
                        else:
                            probes.append(item)
                            for column in data.columns:
                                if column == 'chat':
                                    result_data[column].append(messages + probes) # append everything above "send message"
                                    probes = []
                                else:
                                    result_data[column].append(row[column])
    
    
                    else:
                        for column in data.columns:
    
                            if column == 'chat':
                                probes.append(item)
                                result_data[column].append(messages + probes) # append everything above "send message"
                                probes = []
                            else:
                                result_data[column].append(row[column])
        else:
            for i, item in enumerate(chat):
                if item['role'] == 'assistant':
                    for key in data.columns:
                        if key != 'chat':
                            result_data[key].append(row[key])
                        else:
                            if i + 1 < chat_last_index:
                                result_data[key].append(chat[:i + 1])
                            else:
                                result_data[key].append(row[key])
    return result_data

In [94]:
merged_data.chat = merged_data.chat.apply(ensure_alternating_roles)
# shuffle data
merged_data_shuffled = merged_data.sample(frac=1).reset_index(drop=True)
result_data = prepare_qa_data(merged_data_shuffled)
result_df = pd.DataFrame.from_dict(result_data)

In [95]:
len(merged_data_shuffled), len(merged_data), len(result_df)

(5402, 5402, 7923)

In [103]:
result_df.head(100)

Unnamed: 0,game,game_id,model,benchmark_version,experiment,episode,Aborted,Lose,Success,chat,target,player
0,referencegame,18,gpt-4-0125-preview-t0.0--gpt-4-0125-preview-t0.0,v1.5,4_shape_grids,episode_18,0,0,1,"[{'role': 'user', 'content': 'You are given th...",first,player 1
1,taboo,16,gpt-4-t0.0--gpt-3.5-turbo-t0.0,v0.9,0_high_en,episode_16,0,0,1,"[{'role': 'user', 'content': 'You are playing ...",minnesota,player 1
2,imagegame,16,gpt-4-t0.0--gpt-4-t0.0,v0.9,0_compact_grids,episode_16,0,0,1,"[{'role': 'user', 'content': 'Let us play a ga...",D ▢ ▢ ▢ D\nD D ▢ D D\nD ▢ D ▢ D\nD ▢ ▢ ▢ D\nD ...,player 1
3,imagegame,16,gpt-4-t0.0--gpt-4-t0.0,v0.9,0_compact_grids,episode_16,0,0,1,"[{'role': 'user', 'content': 'Let us play a ga...",D ▢ ▢ ▢ D\nD D ▢ D D\nD ▢ D ▢ D\nD ▢ ▢ ▢ D\nD ...,player 1
4,imagegame,16,gpt-4-t0.0--gpt-4-t0.0,v0.9,0_compact_grids,episode_16,0,0,1,"[{'role': 'user', 'content': 'Let us play a ga...",D ▢ ▢ ▢ D\nD D ▢ D D\nD ▢ D ▢ D\nD ▢ ▢ ▢ D\nD ...,player 1
...,...,...,...,...,...,...,...,...,...,...,...,...
95,wordle_withcritic,4,gpt-3.5-turbo-1106-t0.0--gpt-3.5-turbo-1106-t0.0,v1.0,2_low_frequency_words_clue_with_critic,episode_3,0,0,1,"[{'role': 'user', 'content': 'I need your assi...",there,player 2
96,taboo,7,openchat_3.5-t0.0--openchat_3.5-t0.0,v1.0,2_low_en,episode_7,0,0,1,"[{'role': 'user', 'content': 'You are playing ...",enclose,player 1
97,referencegame,7,gpt-4-1106-preview-t0.0--gpt-4-1106-preview-t0.0,v1.5,3_letter_grids,episode_7,0,0,1,"[{'role': 'user', 'content': 'You are given th...",second,player 1
98,referencegame,23,gemini-1.5-pro-latest-t0.0--gemini-1.5-pro-lat...,v1.6,4_shape_grids,episode_23,0,0,1,"[{'role': 'user', 'content': 'You are given th...","['third', '3rd', '3']",player 1


In [104]:
result_df.game.value_counts()

game
referencegame        2758
taboo                1930
imagegame            1157
wordle_withcritic    1113
wordle_withclue       506
privateshared         336
wordle                123
Name: count, dtype: int64

In [106]:
save_path: str = '../../data/training_data/D90000.csv'
result_df.to_csv(save_path, index=False)