# Zhifeng's AI Player Cards Data Cleaning Notebook

date: y2025m01d17

---

import pandas as pd

In [17]:
import pandas as pd


In [18]:
V0_CASE_COL_NAME = "case_v0_str"
V0_SCENARIO_COL_NAME = "scenario_v0_id_str"
V0_CC_COL_NAME = "computer_card_id"
V0_TURN_IDX_COL_NAME = "turn_idx"
V0_PLAYER_0_CARD_COL_NAME = "ai_player_0_card"
V0_PLAYER_1_CARD_COL_NAME = "ai_player_1_card"

In [19]:
df_v0 = pd.read_csv("ai_cards_v0.csv")
df_v0

Unnamed: 0,case_v0_str,scenario_v0_id_str,computer_card_id,turn_idx,ai_player_0_card,ai_player_1_card,id
0,C1D0a,S1T1,CC1_1,1,S1T1C25,S1T1C9,1
1,C1D0a,S1T1,CC1_1,2,S1T1C11,S1T1C5,2
2,C1D0a,S1T1,CC1_1,3,S1T1C22,S1T1C33,3
3,C1D0a,S1T1,CC1_1,4,S1T1C27,S1T1C14,4
4,C1D0a,S1T1,CC1_1,5,S1T1C23,S1T1C36,5
...,...,...,...,...,...,...,...
643,C1D0c,S1T1,CC36_3,2,S1T1C4,S1T1C29,644
644,C1D0c,S1T1,CC36_3,3,S1T1C18,S1T1C20,645
645,C1D0c,S1T1,CC36_3,4,S1T1C10,S1T1C14,646
646,C1D0c,S1T1,CC36_3,5,S1T1C39,S1T1C23,647


In [20]:
def transform_case_v0_id_str_fn(case_v0_id_str: str):
  CHAR_LOWER_A_U8 = ord('a')
  return (int(case_v0_id_str[1]), int(case_v0_id_str[3]), int(ord(case_v0_id_str[4]) - CHAR_LOWER_A_U8))

assert(transform_case_v0_id_str_fn("C1D1a") == (1,1,0))
assert(transform_case_v0_id_str_fn("C2D2b") == (2,2,1))

In [21]:
def transform_scenario_v0_id_str_fn(name_str:str):
  ans_list = [0,0,0]
  parsing_num_flag = False
  num_i = 0
  for c_char in name_str:
    if c_char.isdigit():
      if num_i > 2:
        print(name_str)
      ans_list[num_i] = ans_list[num_i] * 10 + int(c_char)
      parsing_num_flag = True
    else:
      if parsing_num_flag == True:
        parsing_num_flag = False
        num_i += 1

  return (ans_list[0], ans_list[1], ans_list[2])

assert(transform_scenario_v0_id_str_fn("S1T1C1") == (1,1,1))
assert(transform_scenario_v0_id_str_fn("S1T2C2") == (1,2,2))
assert(transform_scenario_v0_id_str_fn("S1T2C20") == (1,2,20))
assert(transform_scenario_v0_id_str_fn("S1T2") == (1,2,0))

In [22]:
# In other words, whether a scene is the first scene, the second scene, or the third scene.

def transform_in_game_num_idx_str_to_idx_fn(game_num_idx_str: str):
  return int(game_num_idx_str[-1]) - 1

assert(transform_in_game_num_idx_str_to_idx_fn("C36_S1") == 0)
assert(transform_in_game_num_idx_str_to_idx_fn("C36_S2") == 1)

In [23]:
def transform_condition_id_str_to_idx_fn(condition_id_str: str):
  ans_condition_i = 0
  met_number_c_flag = False
  for c in condition_id_str:
    if c.isdigit():
      ans_condition_i = ans_condition_i * 10 + int(c)
      met_number_c_flag = True
    elif met_number_c_flag == True:
      break
  return ans_condition_i

assert(transform_condition_id_str_to_idx_fn("C36_S1") == 36)
assert(transform_condition_id_str_to_idx_fn("C35_S2") == 35)
assert(transform_condition_id_str_to_idx_fn("IC35_S2") == 35)

In [25]:
# info_map = {}
# duplicate_count = 0

row_i = 0
ROW_GROUP_LEN = 6
for row_group_begin_i in range(0, len(df_v0), ROW_GROUP_LEN):


  first_row = df_v0.iloc[row_group_begin_i]

  case_id_tuple = transform_case_v0_id_str_fn(first_row[V0_CASE_COL_NAME])
  scenario_id_tuple = transform_scenario_v0_id_str_fn(first_row[V0_SCENARIO_COL_NAME])
  condition_idx = transform_condition_id_str_to_idx_fn(first_row[V0_CC_COL_NAME])
  game_num_idx = transform_in_game_num_idx_str_to_idx_fn(first_row[V0_CC_COL_NAME])

  ai_player_0_card_idx_list = [transform_scenario_v0_id_str_fn(first_row[V0_PLAYER_0_CARD_COL_NAME])[2] - 1]
  ai_player_1_card_idx_list = [transform_scenario_v0_id_str_fn(first_row[V0_PLAYER_1_CARD_COL_NAME])[2] - 1]

  for row_i in range(row_group_begin_i + 1, row_group_begin_i + ROW_GROUP_LEN):
    row = df_v0.iloc[row_i]
    ai_player_0_card_idx_list.append(transform_scenario_v0_id_str_fn(row[V0_PLAYER_0_CARD_COL_NAME])[2] - 1)
    ai_player_1_card_idx_list.append(transform_scenario_v0_id_str_fn(row[V0_PLAYER_1_CARD_COL_NAME])[2] - 1)

  player_card_id_info = (condition_idx, game_num_idx, case_id_tuple[1],case_id_tuple[2], scenario_id_tuple[0], scenario_id_tuple[1])
  # if not player_card_id_info in info_map.keys():
  #   info_map[player_card_id_info] = (ai_player_0_card_idx_list,ai_player_1_card_idx_list)
  # else:
  #   if info_map[player_card_id_info] != (ai_player_0_card_idx_list, ai_player_1_card_idx_list):
  #     print(player_card_id_info)
  #     print(info_map[player_card_id_info])
  #     print((ai_player_0_card_idx_list, ai_player_1_card_idx_list))
  #   duplicate_count += 1
  print(f"[{condition_idx},{game_num_idx},{case_id_tuple[1]},{case_id_tuple[2]},{scenario_id_tuple[0]},{scenario_id_tuple[1]},{ai_player_0_card_idx_list},{ai_player_1_card_idx_list}],")
#print(f"Duplicate: {duplicate_count} / {len(df_v0) / 6}")

[1,0,0,0,1,1,[24, 10, 21, 26, 22, 39],[8, 4, 32, 13, 35, 12]],
[1,1,0,0,2,1,[22, 35, 21, 0, 19, 17],[4, 7, 6, 5, 32, 24]],
[1,2,0,0,3,1,[27, 11, 14, 39, 30, 18],[5, 1, 31, 22, 7, 34]],
[2,0,0,0,2,2,[15, 23, 36, 6, 12, 38],[8, 24, 26, 4, 7, 22]],
[2,1,0,0,3,2,[31, 20, 36, 12, 26, 14],[28, 5, 32, 7, 43, 17]],
[2,2,0,0,1,2,[23, 10, 29, 25, 8, 40],[20, 19, 5, 33, 26, 38]],
[3,0,0,0,3,3,[38, 8, 13, 37, 29, 33],[7, 14, 1, 15, 21, 34]],
[3,1,0,0,1,3,[20, 6, 21, 29, 7, 26],[28, 0, 10, 35, 5, 2]],
[3,2,0,0,2,3,[22, 16, 24, 11, 34, 12],[15, 4, 23, 28, 39, 29]],
[4,0,1,0,1,3,[20, 39, 11, 28, 35, 0],[23, 21, 13, 12, 2, 33]],
[4,1,1,0,3,2,[6, 9, 19, 28, 7, 5],[10, 36, 2, 27, 17, 25]],
[4,2,1,0,2,1,[22, 40, 23, 4, 5, 7],[20, 21, 18, 39, 24, 36]],
[5,0,1,0,2,2,[20, 0, 13, 8, 4, 24],[35, 36, 11, 9, 22, 2]],
[5,1,1,0,1,1,[24, 10, 6, 8, 13, 4],[9, 21, 11, 5, 12, 2]],
[5,2,1,0,3,3,[36, 19, 17, 7, 15, 14],[25, 13, 26, 6, 34, 23]],
[6,0,1,0,3,1,[37, 2, 23, 5, 22, 1],[0, 14, 35, 3, 34, 32]],
[6,1,1,0,2,3,[5