# Pipeline de Geração de Observações - Tic Tac Toe
**Author**: Gabriela Dellamora Paim, Bruno Carlan

**Version**: 12/04/2025

**Python Ver**: 3.12.9

## Checklist para geração das 64 observações de tabuleiros *in_progress*


In [1]:
import pandas as pd
import random
import itertools
from collections import defaultdict
from math import ceil

PATH_OLD = './data_old/data_processed.csv'
PATH_NEW = './data.csv'
PATH_ONGOING = './data_old/data_ongoing.csv'

X = 1
O = -1
BLANK = 0

O_WIN   = 0
DRAW    = 1
ONGOING = 2
X_WIN   = 3

random.seed(42)

In [2]:
df_processed = pd.read_csv(PATH_OLD, index_col=False)
df_processed.columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', 'category']
df_processed.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,category
count,1914.0,1914.0,1914.0,1914.0,1914.0,1914.0,1914.0,1914.0,1914.0,1914.0
mean,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.491641
std,0.886679,0.85974,0.886679,0.85974,0.912823,0.85974,0.886679,0.85974,0.886679,1.489178
min,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0
25%,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,-1.0,0.0
50%,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0
75%,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0
max,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,3.0


# Gerar dados Ongoing. Utilizar um critério de observação adequado
1. Gerar todas possibilidades
2. Aplicar critério de observação
3. Salvar em um csv

In [3]:
import itertools, random
import pandas as pd

BLANK = 0
X     = 1
O     = -1

WIN_LINES = [
    (0,1,2),(3,4,5),(6,7,8),
    (0,3,6),(1,4,7),(2,5,8),
    (0,4,8),(2,4,6),
]

def is_nonterminal(board):
    for a,b,c in WIN_LINES:
        v = board[a]
        if v != BLANK and v == board[b] == board[c]:
            return False
    return True

def generate_all_inprogress(starting_players=(X, O)):
    all_states = []
    for sp in starting_players:
        op = X if sp == O else O
        for k in range(1, 9):
            n_sp = (k + (1 if sp == X else 0)) // 2
            n_op = k - n_sp
            for pos_sp in itertools.combinations(range(9), n_sp):
                rem = set(range(9)) - set(pos_sp)
                for pos_op in itertools.combinations(rem, n_op):
                    board = [BLANK]*9
                    for i in pos_sp: board[i] = sp
                    for i in pos_op: board[i] = op
                    if abs(n_sp - n_op) > 1: continue
                    if not is_nonterminal(board): continue
                    next_player = sp if (k % 2 == 0) else op
                    # **flatten** board + metadata numa única lista
                    all_states.append(board.copy() + [ONGOING, k, next_player])
    random.shuffle(all_states)
    return all_states


In [4]:
all_states = generate_all_inprogress(starting_players=(X,)) + generate_all_inprogress(starting_players=(O,))
random.shuffle(all_states)
all_states[0]

[-1, 0, -1, 0, 1, 0, -1, 1, 1, 2, 6, -1]

In [5]:
cols = [str(i) for i in range(9)] + ['category', 'n_jogadas','jogador_vez']
df = pd.DataFrame(all_states, columns=cols)

# remove linhas que tiverem as mesmas 9 células
df = df.drop_duplicates(subset=[str(i) for i in range(9)], keep='first', ignore_index=True)
df.sample(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,category,n_jogadas,jogador_vez
2680,0,1,-1,0,1,1,-1,0,-1,2,6,-1
4120,0,1,1,1,1,-1,-1,-1,0,2,7,-1
1128,1,0,-1,0,0,-1,1,0,1,2,5,-1
4416,1,1,-1,0,1,1,-1,-1,0,2,7,1
785,-1,1,-1,0,1,0,1,0,0,2,5,-1
1921,-1,1,1,0,-1,0,0,-1,1,2,6,1
3904,-1,-1,1,0,0,1,0,1,-1,2,6,1
2662,0,1,0,1,0,0,-1,1,-1,2,5,-1
2154,-1,0,0,-1,1,1,0,1,0,2,5,1
3802,1,0,0,1,-1,1,-1,0,-1,2,6,1


In [6]:
print(df.n_jogadas.value_counts())
targets = {
    1: 100, # 18,
    2: 100, # 36,
    3: 100, # 72,
    4: 100, # 72,
    5: 100, # 144,
    6: 100, # 144,
    7: 100, # 288,
    8: 100  # 444
}

n_jogadas
6    1372
5    1140
4     756
7     696
3     252
8     222
2      72
1       9
Name: count, dtype: int64


In [7]:
df_ongoing = pd.DataFrame(columns=df.columns)

for label, n in targets.items():
    df_label = df[df['n_jogadas'] == label]
    df_label = df_label.sample(
        n=min(len(df_label), n),
        random_state=42
    )
    df_ongoing = pd.concat([df_ongoing, df_label], ignore_index=True)

print(df_ongoing['n_jogadas'].value_counts())
print(df_ongoing.sample(10))


n_jogadas
3    100
4    100
5    100
6    100
7    100
8    100
2     72
1      9
Name: count, dtype: int64
      0   1   2   3   4   5   6   7   8 category n_jogadas jogador_vez
105   0   0   0   0   0   0   1  -1   1        2         3          -1
520  -1  -1   1   1   0   0  -1   1   1        2         7           1
493  -1   0  -1   1   0   1   1  -1   1        2         7           1
12    0   0   0  -1   0   0   0   1   0        2         2          -1
139   0   0   1   0   0   1   0  -1   0        2         3          -1
370  -1   1   1   0   0  -1   1   0   0        2         5          -1
413   1   1   0  -1  -1   1   0   0  -1        2         6          -1
563   1  -1   1  -1   1  -1   0   1   0        2         7           1
644   1  -1  -1  -1  -1   1   1   1   0        2         8           1
586  -1   1  -1  -1   1   0   1  -1   1        2         8           1


In [8]:
# concatena os datasets para termos o nosso dataset tratado
df_ongoing.drop(columns=['n_jogadas', 'jogador_vez'], inplace=True)
pd.concat([df_processed, df_ongoing]).to_csv(PATH_NEW, index=False)