# Pipeline de Geração de Observações - Tic Tac Toe
**Author**: Gabriela Dellamora Paim, Bruno Carlan

**Version**: 12/04/2025

**Python Ver**: 3.12.9

## Checklist para geração das 64 observações de tabuleiros *in_progress*


In [None]:
import pandas as pd
import random
import itertools
from collections import defaultdict
from math import ceil

PATH_OLD = './data_old/data_processed.csv'
PATH_NEW = './data.csv'
PATH_ONGOING = './data_old/data_ongoing.csv'

X = 1
O = -1
BLANK = 0

O_WIN   = 0
DRAW    = 1
ONGOING = 2
X_WIN   = 3

random.seed(42)

In [None]:
df_processed = pd.read_csv(PATH_OLD, index_col=False)
df_processed.columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', 'category']
df_processed.describe()

# Gerar dados Ongoing. Utilizar um critério de observação adequado
1. Gerar todas possibilidades
2. Aplicar critério de observação
3. Salvar em um csv

In [None]:
import itertools, random
import pandas as pd

BLANK = 0
X     = 1
O     = -1

WIN_LINES = [
    (0,1,2),(3,4,5),(6,7,8),
    (0,3,6),(1,4,7),(2,5,8),
    (0,4,8),(2,4,6),
]

def is_nonterminal(board):
    for a,b,c in WIN_LINES:
        v = board[a]
        if v != BLANK and v == board[b] == board[c]:
            return False
    return True

def generate_all_inprogress(starting_players=(X, O)):
    all_states = []
    for sp in starting_players:
        op = X if sp == O else O
        for k in range(1, 9):
            n_sp = (k + (1 if sp == X else 0)) // 2
            n_op = k - n_sp
            for pos_sp in itertools.combinations(range(9), n_sp):
                rem = set(range(9)) - set(pos_sp)
                for pos_op in itertools.combinations(rem, n_op):
                    board = [BLANK]*9
                    for i in pos_sp: board[i] = sp
                    for i in pos_op: board[i] = op
                    if abs(n_sp - n_op) > 1: continue
                    if not is_nonterminal(board): continue
                    next_player = sp if (k % 2 == 0) else op
                    # **flatten** board + metadata numa única lista
                    all_states.append(board.copy() + [ONGOING, k, next_player])
    random.shuffle(all_states)
    return all_states


In [None]:
all_states = generate_all_inprogress(starting_players=(X,)) + generate_all_inprogress(starting_players=(O,))
random.shuffle(all_states)
all_states[0]

In [None]:
cols = [str(i) for i in range(9)] + ['category', 'n_jogadas','jogador_vez']
df = pd.DataFrame(all_states, columns=cols)

# remove linhas que tiverem as mesmas 9 células
df = df.drop_duplicates(subset=[str(i) for i in range(9)], keep='first', ignore_index=True)
df.sample(10)

In [None]:
print(df.n_jogadas.value_counts())
targets = {
    1: 50, # 18,
    2: 50, # 36,
    3: 50, # 72,
    4: 50, # 72,
    5: 50, # 144,
    6: 50, # 144,
    7: 50, # 288,
    8: 50  # 444
}

In [None]:
df_ongoing = pd.DataFrame(columns=df.columns)

for label, n in targets.items():
    df_label = df[df['n_jogadas'] == label]
    df_label = df_label.sample(
        n=min(len(df_label), n),
        random_state=42
    )
    df_ongoing = pd.concat([df_ongoing, df_label], ignore_index=True)

print(df_ongoing['n_jogadas'].value_counts())
print(df_ongoing.sample(10))


In [None]:
# concatena os datasets para termos o nosso dataset tratado
df_ongoing.drop(columns=['n_jogadas', 'jogador_vez'], inplace=True)
pd.concat([df_processed, df_ongoing]).to_csv(PATH_NEW, index=False)