## ELO or Year init

Note: you should change the naming of the files as they don't always corrospond to the exact dataset u'd have in mind.

In [None]:
elo_or_year = 2010

## Filter elo

In [None]:
# Filtering
import pandas as pd

games = pd.read_csv('./data/chess_data_2010_titled.csv')

games = games[
    (games['winner'] == 'white') & (games['white_rating'] > elo_or_year) |
    (games['winner'] == 'black') & (games['black_rating'] > elo_or_year)
    ]
games = games.reset_index(drop=True)
games = games[['winner', 'white_rating', 'black_rating', 'moves']]
print('length:', len(games))
print(games)

games.to_csv("./data/games_"+str(elo_or_year)+".csv", index=False)

## Load Filtered data

In [None]:
import pandas as pd

games = pd.read_csv('./data/chess_data_2010_titled.csv')
print(games)

## Set up dataframe

In [None]:
# columns titles
positions = ['p'+str(i) for i in range(64)]
columns = positions
columns += ['player']
# columns += ['alg'] # no need for alg
columns += ['uci']
print(columns)

# dataframe init
games_transformed = pd.DataFrame(columns=columns)

# save headers
# games_transformed.to_csv('./data/games_cleaned_'+str(elo_or_year)+'.csv', index=False, mode='a')

## Final filtering

In [None]:
from chess_transformation import *

index = 0
curr = 8130
for i in range(curr, len(games)):
# for i in range(1):
    boards = []
    move_list = []
    game = games.iloc[i]

    moves = game['moves']
    winner = game['winner']
    # print(moves)
    # print(winner)

    data = algebraic_game_to_training_dataset(moves, winner)

    for j in range(len(data['boards'])):
        board = data['boards'][j]
        # alg = data['moves_alg'][j]
        uci = data['moves_uci'][j]
        # games_transformed.loc[index] = board+[winner]+[alg]+[uci]
        games_transformed.loc[index] = board+[winner]+[uci]
        index+=1
        if index%1000 == 0: # save every 1000 games
            games_transformed.to_csv('./data/games_cleaned_'+str(elo_or_year)+'.csv', index=False, mode='a', header=None)
            games_transformed = pd.DataFrame(columns=columns)
            print(str(i)+'/'+str(len(games)), end='\r')


# games_transformed.to_csv('./data/games_cleaned_'+str(elo_or_year)+'.csv', index=False)
print(games_transformed)

## Scrambling

In [None]:
import pandas as pd

games_cleaned = pd.read_csv('./data/games_cleaned_'+str(elo_or_year)+'.csv')
games_cleaned = games_cleaned.sample(frac = 1).reset_index(drop=True)
print(games_cleaned)
games_cleaned.to_csv('./data/games_cleaned_'+str(elo_or_year)+'_scrambled.csv', index=False)

## Turning the data to tensors

In [None]:
import torch
from model import one_hot_encode_labels, encode_player_col
from chess_transformation import encode_uci
import pandas as pd

data_pandas = pd.read_csv('./data/games_cleaned_'+str(elo_or_year)+'_scrambled.csv')
print(data_pandas)

In [None]:
position_columns = ['p'+str(i) for i in range(64)]

X_pos_pandas = data_pandas[position_columns]
X_col_pandas = data_pandas['player']

X_pos_values = X_pos_pandas.values
X_col_values = X_col_pandas.values
X_list = []
i = 0
for i in range(len(X_pos_values)):
    x_pos = X_pos_values[i]
    x_pos_encoded = one_hot_encode_labels(x_pos)
    x_col = X_col_values[i]
    x_col_encoded = encode_player_col(x_col)

    x = torch.cat((x_pos_encoded, x_col_encoded.unsqueeze(1)), dim=1)
    X_list.append(x)

X = torch.cat(X_list, dim=0)
print(X.shape)
torch.save(X, './large_data/X_tensor_'+str(elo_or_year)+'.pt')

In [None]:
Y_pandas = data_pandas['uci']
Y_pandas_values = Y_pandas.values
Y_list = []
for i in range(len(Y_pandas_values)):
    y_val = Y_pandas_values[i]
    y = encode_uci(y_val)
    Y_list.append(y)

Y = torch.cat(Y_list, dim=0)
print(Y.shape)
torch.save(Y, './large_data/Y_tensor_'+str(elo_or_year)+'.pt')