# Pipeline de Pré-processamento - Tic Tac Toe
**Author**: Gabriela Dellamora Paim

**Version**: 04/04/2025

**Python Ver**: 3.10.11

# Tratar Dados Existentes

In [22]:
import pandas as pd
import random

PATH_OLD = './data_old/tic-tac-toe.data'

In [20]:
df = pd.read_csv(PATH_OLD)
df.columns = ['1', '2', '3', '4', '5', '6', '7', '8', '9', 'category']
df.sample(2)

Unnamed: 0,1,2,3,4,5,6,7,8,9,category
330,o,x,o,o,x,o,x,x,x,positive
772,o,x,x,b,o,b,b,x,o,negative


In [None]:
df['category'].replace(to_replace={'positive' : 1, 'negative' : -1 'draw' : 0, 'ongoing' : '~'}, inplace=True)
df.replace(to_replace={'o' : -1, 'x' : '1', 'b' : 0}, inplace=True)
df

Unnamed: 0,1,2,3,4,5,6,7,8,9,category
0,1,1,1,1,-1,-1,-1,1,-1,x_win
1,1,1,1,1,-1,-1,-1,-1,1,x_win
2,1,1,1,1,-1,-1,-1,0,0,x_win
3,1,1,1,1,-1,-1,0,-1,0,x_win
4,1,1,1,1,-1,-1,0,0,-1,x_win
...,...,...,...,...,...,...,...,...,...,...
952,-1,1,1,1,-1,-1,-1,1,1,y_win
953,-1,1,-1,1,1,-1,1,-1,1,y_win
954,-1,1,-1,1,-1,1,1,-1,1,y_win
955,-1,1,-1,-1,1,1,1,-1,1,y_win


# Gerar Novos Dados

In [None]:
class Board:
    def __init__(self):
        self.board      = [[0,0,0], [0,0,0], [0,0,0]]
        self.moves_left = 9
        self.status     = 'ongoing'

    def move(self, symbol : int):
        if self.moves_left <= 0:
            return
        x, y = self.gen_indexes()
        self.board[x][y] = symbol
        self.moves_left -= 1

    def game_over(self) -> bool:
        someone_wins, symbol = self.check_wins()
        if someone_wins:
            self.status = f'{symbol}_wins'
        elif self.moves_left <= 0:
            self.status = 'draw'
        else:
            self.status = 'ongoing'
        return self.status != 'ongoing'

    def check_wins(self):
        # Diagonais
        if self.board[0][0] == self.board[1][1] == self.board[2][2] != 0:
            return True, self.board[1][1]
        if self.board[0][2] == self.board[1][1] == self.board[2][0] != 0:
            return True, self.board[1][1]
        # Linhas e colunas
        for i in range(3):
            if self.board[0][i] == self.board[1][i] == self.board[2][i] != 0:
                return True, self.board[0][i]
            if self.board[i][0] == self.board[i][1] == self.board[i][2] != 0:
                return True, self.board[i][0]
        return False, ''

    def gen_indexes(self) -> tuple:
        while True:
            x, y = random.randint(0, 2), random.randint(0, 2)
            if self.board[x][y] == 0:
                return x, y

    def observation(self):
        flat = [cell for row in self.board for cell in row]
        flat.append(self.status)
        return flat

In [27]:
def generate_new_movements(df: pd.DataFrame, repeat=1000):
    new_data = []

    for _ in range(repeat):
        board = Board()

        while not board.game_over():
            new_data.append(board.observation())
            board.move(-1)
            if board.game_over():
                break
            new_data.append(board.observation())
            board.move(1)

        new_data.append(board.observation())

    new_df = pd.DataFrame(new_data, columns=df.columns)
    return pd.concat([df, new_df], ignore_index=True)

In [29]:
print(f'Antes={df.count}')
print(f'Depois={generate_new_movements(df, 5000).count}')

Antes=<bound method DataFrame.count of       1   2   3   4   5   6   7   8   9 category
0     1   1   1   1  -1  -1  -1   1  -1    x_win
1     1   1   1   1  -1  -1  -1  -1   1    x_win
2     1   1   1   1  -1  -1  -1   0   0    x_win
3     1   1   1   1  -1  -1   0  -1   0    x_win
4     1   1   1   1  -1  -1   0   0  -1    x_win
..   ..  ..  ..  ..  ..  ..  ..  ..  ..      ...
952  -1   1   1   1  -1  -1  -1   1   1    y_win
953  -1   1  -1   1   1  -1   1  -1   1    y_win
954  -1   1  -1   1  -1   1   1  -1   1    y_win
955  -1   1  -1  -1   1   1   1  -1   1    y_win
956  -1  -1   1   1   1  -1  -1   1   1    y_win

[957 rows x 10 columns]>
Depois=<bound method DataFrame.count of         1  2   3  4   5   6   7   8   9 category
0       1  1   1  1  -1  -1  -1   1  -1    x_win
1       1  1   1  1  -1  -1  -1  -1   1    x_win
2       1  1   1  1  -1  -1  -1   0   0    x_win
3       1  1   1  1  -1  -1   0  -1   0    x_win
4       1  1   1  1  -1  -1   0   0  -1    x_win
...    .. .. 

# Balancear os dados
## Garantindo que existe amostra proporcional de cada (25% de cada categoria)