# Pipeline de Pré-processamento - Tic Tac Toe
**Author**: Gabriela Dellamora Paim

**Version**: 04/04/2025

**Python Ver**: 3.10.11

In [41]:
import pandas as pd
import random

PATH_OLD = './data_old/tic-tac-toe.data'
PATH_NEW = './data.csv'
X_WIN   = X = '1'
O_WIN   = O ='-1'
DRAW    = '0'
ONGOING = '~'

# Tratar Dados Existentes

In [42]:
df = pd.read_csv(PATH_OLD)
df.columns = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'category']
df.sample(2)

Unnamed: 0,1,2,3,4,5,6,7,8,9,category
464,o,b,o,x,o,b,x,x,x,positive
873,o,b,b,o,x,x,o,x,b,negative


In [43]:
df['category'].replace(to_replace={'positive' : X_WIN, 'negative' : O_WIN, 'draw' : DRAW, 'ongoing' : ONGOING}, inplace=True)
df.replace(to_replace={'o' : O, 'x' : X, 'b' : 0}, inplace=True)
df

Unnamed: 0,1,2,3,4,5,6,7,8,9,category
0,1,1,1,1,-1,-1,-1,1,-1,1
1,1,1,1,1,-1,-1,-1,-1,1,1
2,1,1,1,1,-1,-1,-1,0,0,1
3,1,1,1,1,-1,-1,0,-1,0,1
4,1,1,1,1,-1,-1,0,0,-1,1
...,...,...,...,...,...,...,...,...,...,...
952,-1,1,1,1,-1,-1,-1,1,1,-1
953,-1,1,-1,1,1,-1,1,-1,1,-1
954,-1,1,-1,1,-1,1,1,-1,1,-1
955,-1,1,-1,-1,1,1,1,-1,1,-1


# Gerar Novos Dados

In [63]:
class Board:
    def __init__(self):
        self.board      = [[0,0,0], [0,0,0], [0,0,0]]
        self.moves_left = 9
        self.status     = ONGOING

    def move(self, symbol : int):
        if self.moves_left <= 0:
            self.update_status()
            return
        x, y = self.gen_indexes()
        self.board[x][y] = symbol
        self.moves_left -= 1
        self.update_status()

    def update_status(self):
        if self.moves_left <= 0:
            self.status = DRAW
        else:
            self.status = self.check_wins()

    def check_wins(self):
        # Diagonais
        if self.board[0][0] == self.board[1][1] == self.board[2][2] != 0:
            return self.board[1][1]
        if self.board[0][2] == self.board[1][1] == self.board[2][0] != 0:
            return self.board[1][1]
        # Linhas e colunas
        for i in range(3):
            if self.board[0][i] == self.board[1][i] == self.board[2][i] != 0:
                return self.board[0][i]
            if self.board[i][0] == self.board[i][1] == self.board[i][2] != 0:
                return self.board[i][0]
        return ONGOING

    def gen_indexes(self) -> tuple:
        while True:
            x, y = random.randint(0, 2), random.randint(0, 2)
            if self.board[x][y] == 0:
                return x, y

    def observation(self):
        flat = [cell for row in self.board for cell in row]
        flat.append(self.status)
        return flat

In [None]:
def generate_new_movements(df: pd.DataFrame, repeat=1000):
    ran_var = 5
    new_data = []

    for _ in range(repeat):
        board = Board()

        while True:
            board.move(-1)
            if board.status != ONGOING:
                break
            new_data.append(board.observation()) if random.randrange(ran_var) == 0 else None

            board.move(1)
            if board.status != ONGOING:
                break
            new_data.append(board.observation()) if random.randrange(ran_var) == 0 else None

        new_data.append(board.observation())


    new_df = pd.DataFrame(new_data, columns=df.columns)
    return pd.concat([df, new_df], ignore_index=True)

'''
Tenho medo de que se observarmos todas ações até um resultado, isso poderá gerar um viés.
Por isso, adicionei uma "aleatoriedade" pra amostragem de tabuleiros "ONGOING"
'''

In [71]:
df_expanded = generate_new_movements(df, 5000)
print(f'Antes={df.size} : Depois={df_expanded.size}')

Antes=9570 : Depois=85380


# Balancear os dados
## Garantindo que existe amostra proporcional de cada (25% de cada categoria)

# Exportar Dados

In [72]:
df_expanded.to_csv(PATH_NEW, index=False)