In [47]:
import pandas as pd

In [48]:
positive_episodes: dict = {'0_line_grids_rows': ['episode_25', 'episode_18'],
                            '1_line_grids_columns': ['episode_10', 'episode_0'],
                            '2_diagonal_grids': ['episode_27', 'episode_24'],
                            '3_letter_grids': ['episode_8', 'episode_11'],
                            '4_shape_grids': ['episode_15', 'episode_24'],
                            '5_random_grids': ['episode_28', 'episode_0']}

negative_episodes: dict = {'0_line_grids_rows': ['episode_29', 'episode_7', 'episode_10', 'episode_15'],
                           '1_line_grids_columns': ['episode_15', 'episode_5', 'episode_3', 'episode_22'],
                           '2_diagonal_grids': ['episode_5', 'episode_29', 'episode_17', 'episode_8'],
                           '3_letter_grids': ['episode_29', 'episode_22', 'episode_24', 'episode_13'],
                           '4_shape_grids': ['episode_13', 'episode_9', 'episode_17', 'episode_1'],
                           '5_random_grids': ['episode_23', 'episode_17', 'episode_1', 'episode_20']}

test_episodes: dict = {key: [*positive_episodes[key], *negative_episodes[key]] for key in positive_episodes.keys()}

In [49]:
test_episodes

{'0_line_grids_rows': ['episode_25',
  'episode_18',
  'episode_29',
  'episode_7',
  'episode_10',
  'episode_15'],
 '1_line_grids_columns': ['episode_10',
  'episode_0',
  'episode_15',
  'episode_5',
  'episode_3',
  'episode_22'],
 '2_diagonal_grids': ['episode_27',
  'episode_24',
  'episode_5',
  'episode_29',
  'episode_17',
  'episode_8'],
 '3_letter_grids': ['episode_8',
  'episode_11',
  'episode_29',
  'episode_22',
  'episode_24',
  'episode_13'],
 '4_shape_grids': ['episode_15',
  'episode_24',
  'episode_13',
  'episode_9',
  'episode_17',
  'episode_1'],
 '5_random_grids': ['episode_28',
  'episode_0',
  'episode_23',
  'episode_17',
  'episode_1',
  'episode_20']}

In [50]:
path_data: str = '../../data/processed/referencegame_new_processed.jsonl'

df = pd.read_json(path_data, lines=True)
df_success = df[df['Success'] == 1]

In [51]:
len(df_success)

18962

In [52]:
models_to_consider: list = [
    'o1-preview-2024-09-12',
    'gpt-4-turbo-2024-04-09',
    'claude-3-5-sonnet-20240620',
    'gpt-4-0125-preview',
    'Meta-Llama-3.1-405B-Instruct-Turbo',
    'gpt-4-1106-preview',
    'gpt-4-0613',
    'gpt-4o-2024-05-13',
    'gpt-4o-2024-08-06',
    'Mistral-Large-Instruct-2407',
    'claude-3-opus-20240229',
    'gemini-1.5-pro-latest'
]

all_available_models = list(df_success.model.unique())

In [54]:
# check that all models are available
top_10_models = []

for m in models_to_consider:
    for s in all_available_models:
        if m in s:
            top_10_models.append(s)
len(set(top_10_models))
top_5_models = top_10_models[:5]

In [55]:
top_10_models

['gpt-4-turbo-2024-04-09-t0.0--gpt-4-turbo-2024-04-09-t0.0',
 'claude-3-5-sonnet-20240620-t0.0--claude-3-5-sonnet-20240620-t0.0',
 'gpt-4-0125-preview-t0.0--gpt-4-0125-preview-t0.0',
 'Meta-Llama-3.1-405B-Instruct-Turbo-t0.0--Meta-Llama-3.1-405B-Instruct-Turbo-t0.0',
 'gpt-4-1106-preview-t0.0--gpt-4-1106-preview-t0.0',
 'gpt-4-0613-t0.0--gpt-4-0613-t0.0',
 'gpt-4o-2024-05-13-t0.0--gpt-4o-2024-05-13-t0.0',
 'Mistral-Large-Instruct-2407-t0.0--Mistral-Large-Instruct-2407-t0.0',
 'claude-3-opus-20240229-t0.0--claude-3-opus-20240229-t0.0',
 'gemini-1.5-pro-latest-t0.0--gemini-1.5-pro-latest-t0.0']

In [56]:
# get data from top 10 models
df_success_top_10 = df_success[df_success['model'].isin(top_10_models)]
df_success_top_5 = df_success[df_success['model'].isin(top_5_models)]


In [57]:
# filter episodes that should not be inside
# Function to filter the DataFrame
def filter_dataframe(df, episode_dict):
    # Apply filtering logic row by row
    mask = df.apply(lambda row: row['episode'] not in episode_dict[row['experiment']], axis=1)
    return df[mask]

# Filtered DataFrame
filtered_df = filter_dataframe(df_success_top_10, test_episodes)
filtered_df_light = filter_dataframe(df_success_top_5, test_episodes)
stats = filtered_df.groupby('experiment').episode.unique()

In [58]:
stats.head()

experiment
0_line_grids_rows       [episode_0, episode_1, episode_11, episode_12,...
1_line_grids_columns    [episode_1, episode_11, episode_12, episode_13...
2_diagonal_grids        [episode_11, episode_12, episode_13, episode_1...
3_letter_grids          [episode_0, episode_1, episode_12, episode_14,...
4_shape_grids           [episode_0, episode_10, episode_11, episode_12...
Name: episode, dtype: object

In [59]:
filtered_df.groupby('experiment').episode.describe()

Unnamed: 0_level_0,count,unique,top,freq
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_line_grids_rows,484,24,episode_0,28
1_line_grids_columns,518,24,episode_24,28
2_diagonal_grids,454,24,episode_21,28
3_letter_grids,438,24,episode_0,28
4_shape_grids,454,24,episode_0,28
5_random_grids,410,24,episode_15,28


In [60]:
df_success_top_10.groupby('experiment').episode.describe()

Unnamed: 0_level_0,count,unique,top,freq
experiment,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0_line_grids_rows,602,30,episode_0,28
1_line_grids_columns,656,30,episode_0,28
2_diagonal_grids,574,30,episode_24,28
3_letter_grids,542,30,episode_0,28
4_shape_grids,576,30,episode_0,28
5_random_grids,510,30,episode_15,28


In [61]:
filtered_df.to_csv('../../data/training_data/D70005.csv')
filtered_df_light.to_csv('../../data/training_data/D70005_LIGHT.csv')

In [62]:
def ensure_data_coherence(dfs, episode_dictionary):
    for i, row in dfs.iterrows():
        episode = row['episode']
        experiment = row['experiment']
        
        assert episode not in episode_dictionary[experiment]


ensure_data_coherence(filtered_df, test_episodes) 

In [63]:
filtered_df.game.unique()

array(['referencegame'], dtype=object)

In [64]:
len(filtered_df)

2758

In [65]:
len(filtered_df_light)

1410