In [137]:
from utils.handle_data import get_integer_columns_and_dtype
import pandas as pd
import numpy as np
import random

In [69]:
d_type = get_integer_columns_and_dtype('Handled_data/draft_data_best_players.csv')
raw_data = pd.read_csv('Handled_data/draft_data_best_players.csv', dtype=d_type)

In [70]:
#Lê metadados dos cards
SET = "ltr_cards_data"
meta_data_cards = pd.read_csv('Handled_data/'+SET+'.csv')

In [71]:
#groupby draft_id
draft_id_group = raw_data.groupby('draft_id')

In [72]:
#remove all group with less than 42 rows
draft_id_group = draft_id_group.filter(lambda x: len(x) == 42)

In [73]:
# Seed for reproducibility
np.random.seed(42)

# Get a list of unique group identifiers (i.e., unique 'draft_id's)
unique_ids = draft_id_group['draft_id'].unique()

# Sample 20% of the unique identifiers for the test set
test_ids = np.random.choice(unique_ids, size=int(len(unique_ids) * 0.20), replace=False)

# Create a new list for the remaining ids (those not in test set)
remaining_ids = [id for id in unique_ids if id not in test_ids]

# Sample 25% of the remaining identifiers for the validation set (0.25 * 0.8 = 0.2)
val_ids = np.random.choice(remaining_ids, size=int(len(remaining_ids) * 0.25), replace=False)

# The rest (60% of total) goes to the train set
train_ids = [id for id in remaining_ids if id not in val_ids]

In [118]:
# Create the sets based on the sampled identifiers
test_set = draft_id_group[draft_id_group['draft_id'].isin(test_ids)]
val_set = draft_id_group[draft_id_group['draft_id'].isin(val_ids)]
train_set = draft_id_group[draft_id_group['draft_id'].isin(train_ids)]

In [119]:
#save data
train_set.to_csv('dataset/train_set.csv', index=False)
test_set.to_csv('dataset/test_set.csv', index=False)
val_set.to_csv('dataset/val_set.csv', index=False)

### Add metadata

In [138]:
#Load all cards data
meta_data_card = pd.read_csv('Handled_data/ltr_cards_data.csv')

In [139]:
meta_data_card['id'] = meta_data_card.index

In [140]:
meta_data_card

Unnamed: 0,name,rarity,color_identity,cmc,colors,id
0,"Andúril, Flame of the West",mythic,T,3.0,T,0
1,"Aragorn and Arwen, Wed",mythic,M,6.0,M,1
2,"Aragorn, Company Leader",rare,M,3.0,M,2
3,"Aragorn, the Uniter",mythic,M,4.0,M,3
4,"Arwen, Mortal Queen",mythic,M,3.0,M,4
...,...,...,...,...,...,...
279,"Witch-king, Bringer of Ruin",rare,B,6.0,B,279
280,Witch-king of Angmar,mythic,B,5.0,B,280
281,Wizard's Rockets,common,T,1.0,T,281
282,Wose Pathfinder,common,G,2.0,G,282


In [141]:
# One-hot encoding para 'rarity'
meta_data_card = pd.concat([meta_data_card, pd.get_dummies(meta_data_card['rarity'], prefix='rarity').astype(int)], axis=1)
meta_data_card.drop(['rarity'], axis=1, inplace=True)

# One-hot encoding para 'colors'
meta_data_card = pd.concat([meta_data_card, pd.get_dummies(meta_data_card['colors'], prefix='colors').astype(int)], axis=1)
meta_data_card.drop(['colors'], axis=1, inplace=True)

# cmc normalization
meta_data_card['cmc'] = (meta_data_card['cmc'] - meta_data_card['cmc'].mean()) / meta_data_card['cmc'].std()

In [193]:
meta_data_card.head(5)

Unnamed: 0,name,color_identity,cmc,id,rarity_common,rarity_mythic,rarity_rare,rarity_uncommon,colors_B,colors_G,colors_M,colors_R,colors_T,colors_U,colors_W
0,"Andúril, Flame of the West",T,0.002318,0,0,1,0,0,0,0,0,0,1,0,0
1,"Aragorn and Arwen, Wed",M,1.977281,1,0,1,0,0,0,0,1,0,0,0,0
2,"Aragorn, Company Leader",M,0.002318,2,0,0,1,0,0,0,1,0,0,0,0
3,"Aragorn, the Uniter",M,0.660639,3,0,1,0,0,0,0,1,0,0,0,0
4,"Arwen, Mortal Queen",M,0.002318,4,0,1,0,0,0,0,1,0,0,0,0


In [147]:
grouped = test_set.groupby('draft_id')

X = []  # Lista para armazenar sequências de entrada
y = []  # Lista para armazenar sequências de saída

# Função para obter características da carta
def get_card_features(card_name):
    card_row = meta_data_card[meta_data_card['name'] == card_name].squeeze()
    return [card_row['id'], card_row['cmc'], card_row['colors'], card_row['rarity']]



In [177]:
# Convert meta_data_card to a dictionary for faster lookups
card_features_dict = meta_data_card.set_index('name').T.to_dict('list')

def get_card_features(card_name):
    """Retrieve card features from the dictionary."""
    return card_features_dict[card_name]

grouped = test_set[:42].groupby('draft_id')
X = []  # List to store input sequences
y = []  # List to store output sequences

for _, group in grouped:
    sequence = []
    for index, row in group.iterrows():
        # Identify which cards are available in this row
        available_cards = [col.split("pack_card_")[1] for col in row.filter(like='pack_card_').index if row[col] == 1]

        # For each available card, get its features
        available_cards_features = [get_card_features(card_name) for card_name in available_cards]
        sequence.append(available_cards_features)

        # The output is the pick column
        y.append(row['pick'])

    X.append(sequence)