# Pipeline de Pré-processamento - Tic Tac Toe
**Author**: Gabriela Dellamora Paim, Bruno Duarte Carlan

**Version**: 12/04/2025

**Python Ver**: 3.12.9

In [1]:
import pandas as pd

PATH_OLD = './data_old/tic-tac-toe.data'
PATH_NEW = './data_processed.csv'
PATH_ON_GOING = './data_ongoing.csv'
X_WIN   = X = '1'
O_WIN   = O ='-1'
BLANK   = '0'
DRAW    = '0'
ONGOING = '0.5'

In [2]:
# Dataframe onde X sempre é quem começa
df_x_begins = pd.read_csv(PATH_OLD, index_col=False)
df_x_begins.columns = ['0', '1', '2', '3', '4', '5', '6', '7', '8', 'category']
df_x_begins.describe()

Unnamed: 0,0,1,2,3,4,5,6,7,8,category
count,957,957,957,957,957,957,957,957,957,957
unique,3,3,3,3,3,3,3,3,3,2
top,x,x,x,x,x,x,x,x,x,positive
freq,417,377,417,377,458,378,417,378,418,625


# Tratar Dados Existentes

In [3]:
# Para facilitar a diferenciacao entre X venceu e O venceu
df_x_begins.replace(to_replace={'x' : X, 'o' : O, 'b' : BLANK, 'positive' : X_WIN}, inplace=True)

In [4]:
# Observações onde O começa
df_o_begins = df_x_begins.replace(to_replace={X:O, O:X, X_WIN:O_WIN})

df = pd.concat([df_x_begins, df_o_begins], ignore_index=True).copy()
print(f'BEFORE={df_x_begins.shape} : AFTER={df.shape}')

print(df.category.unique())
df.describe()

BEFORE=(957, 10) : AFTER=(1914, 10)
['1' 'negative' '-1']


Unnamed: 0,0,1,2,3,4,5,6,7,8,category
count,1914,1914,1914,1914,1914,1914,1914,1914,1914,1914
unique,3,3,3,3,3,3,3,3,3,3
top,1,1,1,1,-1,-1,-1,1,-1,negative
freq,752,707,752,707,797,707,752,707,752,664


In [5]:
def get_state(symbol):
    ''' Retorna simbolo utilizado pela pipeline.'''
    if symbol == O: return O_WIN
    if symbol == X: return X_WIN
    return DRAW

def check_wins(obs):
    '''Valida estado final do jogo. Pode ser O_WIN, X_WIN e DRAW. Trata casos que não precisam de alteração'''
    # lazy evaluation
    if obs.category != 'negative':
        return obs.category

    # Diagonais
    if (obs.iloc[0] == obs.iloc[4] == obs.iloc[8] != BLANK) or (obs.iloc[2] == obs.iloc[4] == obs.iloc[6] != BLANK):
        return get_state(obs.iloc[4])

    for i in range(3):
        # Linhas
        if obs.iloc[0 + i] == obs.iloc[3 + i] == obs.iloc[6 + i] != BLANK:
            return get_state(obs.iloc[0 + i])
        # Colunas
        if obs.iloc[0 + (i * 3)] == obs.iloc[1 + (i * 3)] == obs.iloc[2 + (i * 3)] != BLANK:
            return get_state(obs.iloc[0 + (i * 3)])
    return DRAW

In [6]:
df['category'] = df.apply(check_wins, axis=1)

In [7]:
print(df.category.unique())
print(f'Total={df.shape}')
print(f'Draw={df[df.category == DRAW].shape}')
df.describe()
df[df.category == DRAW].sample(10)

['1' '-1' '0']
Total=(1914, 10)
Draw=(32, 10)


Unnamed: 0,0,1,2,3,4,5,6,7,8,category
949,1,-1,-1,-1,1,1,1,1,-1,0
1907,1,-1,-1,-1,-1,1,1,1,-1,0
1912,1,-1,1,1,-1,-1,-1,1,-1,0
1905,-1,1,-1,1,1,-1,-1,-1,1,0
1904,-1,1,-1,1,-1,-1,1,-1,1,0
953,-1,1,-1,1,1,-1,1,-1,1,0
1908,1,-1,-1,-1,1,1,-1,1,-1,0
951,-1,1,1,1,-1,-1,1,-1,1,0
1898,-1,-1,1,1,-1,-1,-1,1,1,0
952,-1,1,1,1,-1,-1,-1,1,1,0


In [8]:
# Exportar
df.to_csv(PATH_NEW, index=False)