In [3]:
import numpy as np
import pandas as pd
import pysubgroup as ps
from mlxtend.frequent_patterns import apriori, association_rules

In [4]:
df_original = pd.read_csv('./Data/tournaments.csv')
df = df_original.copy()

#### Tratando dataframe

In [5]:
df.replace(np.nan, 0, inplace=True)
df.tail()

Unnamed: 0,id_card,name_card,amount_card,price_card,energy_type_card,type_card,combo_type_id,combo_type_name,id_player,name_player,...,region_tournament,country_tournament,year_tournament,month_tournament,day_tournament,valid_rotation_at_tournament,rotation_name,year_begin,month_begin,day_begin
114286,GRI130a,Rescue Stretcher,2,0.72,0,Trainer,47.0,Zoroark Greninja,1900,Jin Woo Lee,...,AS-OC,Korea,2018,12,22,0,standard_2019,2018,8,28
114287,GRI119,Aqua Patch,2,0.27,0,Trainer,47.0,Zoroark Greninja,1900,Jin Woo Lee,...,AS-OC,Korea,2018,12,22,0,standard_2019,2018,8,28
114288,FLF092,Pal Pad,1,0.68,0,Trainer,47.0,Zoroark Greninja,1900,Jin Woo Lee,...,AS-OC,Korea,2018,12,22,0,standard_2019,2018,8,28
114289,GRI121,Choice Band,2,0.2,0,Trainer,47.0,Zoroark Greninja,1900,Jin Woo Lee,...,AS-OC,Korea,2018,12,22,0,standard_2019,2018,8,28
114290,GRI120,Brooklet Hill,2,0.32,0,Trainer,47.0,Zoroark Greninja,1900,Jin Woo Lee,...,AS-OC,Korea,2018,12,22,0,standard_2019,2018,8,28


#### Tentaremos encontrar subgrupos frequentes de cartas que aparecem em torneios diferentes de batalha de cartas pokemon no ano de 2019

- Definindo todas as regioes do dataset

In [6]:
df = df[df['region_tournament'] != 0]

# eliminando cartas do tipo treinador
df = df[df['type_card'] != 'Trainer']
regions = df['region_tournament'].unique()

regions

array(['SA', 'EU', 'AS-OC', 'JP'], dtype=object)

- Definindo treinadores com suas cartas para cada regiao e realizando apriori para descobrir padroes de cartas utilizadas por jogadores diferentes

In [7]:
# Agrupando cartas de cada jogador no dataset
# Dessa forma, separamos os dados no formato das transações utilizadas no Apriori
def agrupa_jogador_cartas(df):
    dict_jogador_cartas = {}
    for _, row in df.iterrows():
        if row['name_player'] not in dict_jogador_cartas:
            dict_jogador_cartas[row['name_player']] = []
        dict_jogador_cartas[row['name_player']].append(row['name_card'])
    return len(dict_jogador_cartas), dict_jogador_cartas

# Fazendo filtro por região somente no ano de 2019
info_por_regiao = {}
for region in regions:
    print(f"region: {region}")
    df_region = df[(df['region_tournament'] == region) & (df['year_tournament'] == 2019)]
    jogadores = df_region['name_player'].unique()
    cartas = df_region['name_card'].unique()

    quantidade_jogadores, dict_jogador_cartas = agrupa_jogador_cartas(df_region)

    # Cria um dataframe que as colunas sao todas as cartas existentes no dataset
    df_jogadores_cartas = pd.DataFrame(columns=cartas)

    for jogador in jogadores:
        cartas_jogador = dict_jogador_cartas[jogador]
        linha = []
        # Construo matriz de incidencia -> True se o jogador possui a carta, False caso contrario
        for carta in cartas:
            if carta in cartas_jogador:
                linha.append(True)
            else:
                linha.append(False)
        df_jogadores_cartas.loc[len(df_jogadores_cartas)] = linha

    # Apriori

    # Suporte - O quanto a carta aparece no total de cartas usadas
    frequent_itemsets = apriori(df_jogadores_cartas, min_support=0.22, use_colnames=True) # use_colnames -> nome das cartas ao inves de indices
    frequent_itemsets.sort_values(by='support', ascending=False, inplace=True)

    # Regras: Se compro A (antecedente) -> compro B (consequente) com confianca de X%
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.4) # confiança das regras geradas -> 40%
    # Lift -> Se a regra é relevante ou não (se é maior que 1, é relevante)
    # Ignoro algumas colunas para facilitar a visualização
    rules = rules.sort_values(by='lift', ascending=False).drop(['antecedent support', 'consequent support', 'leverage', 'conviction', 'zhangs_metric'], axis=1)
    
    info_por_regiao[region] = {'itemsets': frequent_itemsets[:3], 'rules': rules[:3]}

    print(f"Total de jogadores: {quantidade_jogadores}")
    print("Itemsets frequentes")
    print(frequent_itemsets)
    print("Regras")
    print(rules)
    print("\n\n")

region: SA
Total de jogadores: 73
Itemsets frequentes
    support                                    itemsets
0  0.589041                                   (Jirachi)
1  0.493151                                (Dedenne-GX)
2  0.410959              (Mega Lopunny & Jigglypuff-GX)
6  0.342466                       (Jirachi, Dedenne-GX)
3  0.301370               (Arceus & Dialga & Palkia-GX)
7  0.287671  (Dedenne-GX, Mega Lopunny & Jigglypuff-GX)
4  0.246575                                 (Keldeo-GX)
5  0.246575                                 (Cryogonal)
8  0.246575    (Arceus & Dialga & Palkia-GX, Cryogonal)
Regras
                      antecedents                     consequents   support  \
4   (Arceus & Dialga & Palkia-GX)                     (Cryogonal)  0.246575   
5                     (Cryogonal)   (Arceus & Dialga & Palkia-GX)  0.246575   
3  (Mega Lopunny & Jigglypuff-GX)                    (Dedenne-GX)  0.287671   
2                    (Dedenne-GX)  (Mega Lopunny & Jigglypuff-G

### ANALISAR --> PQ AS REGRAS SÃO REPETIDAS?
EX: ANTECEDENTE A - CONSEQUENTE B
ANTECEDENTE B - CONSEQUENTE A

TEM COMO RETIRAR ISSO?

- Analisando os resultados mais relevantes de cada região:

In [8]:
for region in regions:
    print(f"Região: {region}")
    print("Itemsets frequentes")
    print(info_por_regiao[region]['itemsets'])
    print("Regras")
    print(info_por_regiao[region]['rules'])
    print("\n\n")

Região: SA
Itemsets frequentes
    support                        itemsets
0  0.589041                       (Jirachi)
1  0.493151                    (Dedenne-GX)
2  0.410959  (Mega Lopunny & Jigglypuff-GX)
Regras
                      antecedents                    consequents   support  \
4   (Arceus & Dialga & Palkia-GX)                    (Cryogonal)  0.246575   
5                     (Cryogonal)  (Arceus & Dialga & Palkia-GX)  0.246575   
3  (Mega Lopunny & Jigglypuff-GX)                   (Dedenne-GX)  0.287671   

   confidence      lift  
4    0.818182  3.318182  
5    1.000000  3.318182  
3    0.700000  1.419444  



Região: EU
Itemsets frequentes
    support       itemsets
5  0.650602  (Tapu Koko ♢)
0  0.590361      (Jirachi)
7  0.590361    (Marshadow)
Regras
                     antecedents                  consequents   support  \
337  (Tapu Koko ♢, Tapu Lele-GX)      (Zeraora-GX, Marshadow)  0.240964   
334      (Zeraora-GX, Marshadow)  (Tapu Koko ♢, Tapu Lele-GX)  0.24096

In [9]:
print("Media de preço das cartas em todos os torneios em todos os anos" , df['price_card'].mean())
print("Media de preço das cartas em todos os torneios no ano de 2019" , df[(df['year_tournament'] == 2019)]['price_card'].mean())
print("Preço da carta Jirachi em 2019" , df[(df['year_tournament'] == 2019) & (df['name_card'] == 'Jirachi')]['price_card'].mean())

Media de preço das cartas em todos os torneios em todos os anos 2.674447953922616
Media de preço das cartas em todos os torneios no ano de 2019 4.295818992989164
Preço da carta Jirachi em 2019 6.5


In [10]:
print("Media de quantidade máxima de cartas iguais em todos os torneios em todos os anos" , df['amount_card'].mean())
print("Media de preço das cartas em todos os torneios no ano de 2019" , df[(df['year_tournament'] == 2019)]['amount_card'].mean())
print("Preço da carta Jirachi em 2019" , df[(df['year_tournament'] == 2019) & (df['name_card'] == 'Jirachi')]['amount_card'].mean())

Media de quantidade máxima de cartas iguais em todos os torneios em todos os anos 1.8847600910399926
Media de preço das cartas em todos os torneios no ano de 2019 1.751434034416826
Preço da carta Jirachi em 2019 3.1601941747572817


### SA
Jirachi, Dedenne-GX, Mega Lopunny & Jigglypuff-GX
Regras Arceus & Dialga & Palkia-GX e Cryogonal

### EU
Tapu Koko ♢, Jirachi, Marshdown
Regras envolvendo Tapu Lele-GX, Tapu Koko ♢, Marshdown e Zeraora-GX

### AS-OC
Jirachi, Tapu Lele-GX, Marshdown
Destaque para regras enolvendo Tapu Koko ♢, Jirachi, Zapdos e Absol

### JP
Jirachi, Dedenne-GX, Marshdow (destaque para regras envolvendo essa carta)

Percebe-se que a carta Jirachi foi almplamente utilizadas em torneios de 2019 em diferentes regiões ao redor do mundo. Jirachi é uma carta com um alto custo em comparação a média dos valores nos torneios (média de 2.95 contra custo de 6.5 da carta Jirachi em 2019), porém ao mesmo tempo, pode ser mais utilizada em um mesmo deck (3 vezes em comparação a média de 2.17 naquele ano)


--------------------------------------------------------------

# Analise para combos de cartas

In [11]:
df = df_original.copy()

df = df[(df['year_tournament'] == 2019)]


df.replace(np.nan, 0, inplace=True)
df.tail()

Unnamed: 0,id_card,name_card,amount_card,price_card,energy_type_card,type_card,combo_type_id,combo_type_name,id_player,name_player,...,region_tournament,country_tournament,year_tournament,month_tournament,day_tournament,valid_rotation_at_tournament,rotation_name,year_begin,month_begin,day_begin
113994,UNB170,Energy Spinner,1,0.13,0,Trainer,84.0,Zapdos,1889,Sumin Lim,...,AS-OC,Korea,2019,2,16,SUM-TEU,standard_2019,2018,8,28
113995,UPR122a,Escape Board,2,0.0,0,Trainer,84.0,Zapdos,1889,Sumin Lim,...,AS-OC,Korea,2019,2,16,SUM-TEU,standard_2019,2018,8,28
113996,GRI121a,Choice Band,2,19.74,0,Trainer,84.0,Zapdos,1889,Sumin Lim,...,AS-OC,Korea,2019,2,16,SUM-TEU,standard_2019,2018,8,28
113997,CES143,Shrine of Punishment,2,0.33,0,Trainer,84.0,Zapdos,1889,Sumin Lim,...,AS-OC,Korea,2019,2,16,SUM-TEU,standard_2019,2018,8,28
113998,LOT191,Thunder Mountain ♢,1,1.07,0,Trainer,84.0,Zapdos,1889,Sumin Lim,...,AS-OC,Korea,2019,2,16,SUM-TEU,standard_2019,2018,8,28


In [12]:
# If 'presence' column doesn't exist, create it for the BinaryTarget
df['presence'] = (df['name_card'] == 'Jirachi').astype(int)

In [13]:
len(df)

11446

In [14]:
# Define the target for subgroup discovery
target = ps.BinaryTarget('presence', 1)

# Define the search space
combo_selectors = [ps.EqualitySelector('combo_type_name', value) for value in df['combo_type_name'].unique()]
ranking_selector = [ps.EqualitySelector('ranking_player_tournament', value) for value in df['ranking_player_tournament'].unique()]
category_selectors = [ps.EqualitySelector('category_tournament', value) for value in df['category_tournament'].unique()]
region_selectors = [ps.EqualitySelector('region_tournament', value) for value in df['region_tournament'].unique()]
search_space = combo_selectors +  ranking_selector + category_selectors + region_selectors

In [15]:
search_space

[combo_type_name=='Reshiram & Charizard Fire Box',
 combo_type_name=='Mewtwo & Mew',
 combo_type_name=='ADP Keldeo',
 combo_type_name=='Blacephalon Naganadel',
 combo_type_name=='Florges Dolls',
 combo_type_name=='Garchomp & Giratina',
 combo_type_name=='Green's ADP',
 combo_type_name=='Blacephalon Pidgeotto',
 combo_type_name=='Pidgeotto Control',
 combo_type_name=='Gardevoir & Sylveon',
 combo_type_name=='Naganadel & Guzzlord',
 combo_type_name=='Charizard & Braixen',
 combo_type_name=='Malamar Giratina',
 combo_type_name=='Quagsire Silvally',
 combo_type_name=='Pikachu & Zekrom',
 combo_type_name=='Mewtwo & Mew Fire Box',
 combo_type_name=='Quagsire Naganadel',
 combo_type_name=='Charizard',
 combo_type_name=='Blacephalon Silvally',
 combo_type_name=='Zoroark Dewgong',
 combo_type_name=='Blacephalon',
 combo_type_name=='Pikachu & Zekrom Zapdos',
 combo_type_name=='Ultra Malamar ',
 combo_type_name=='Zapdos',
 combo_type_name=='Lucario & Melmetal Vileplume',
 combo_type_name=='Spirit

In [16]:

task = ps.SubgroupDiscoveryTask(df, target, search_space, result_set_size=15, depth=5, qf=ps.WRAccQF())

# Run the SD-Map algorithm
result = ps.SimpleDFS().execute(task)


In [17]:
# Display the results
for row in result.to_dataframe().itertuples():
    print(row)

Pandas(Index=0, quality=0.001806979198588307, subgroup=(combo_type_name=='Ultra Malamar '), size_sg=857, size_dataset=11446, positives_sg=40, positives_dataset=258, size_complement=10589, relative_size_sg=0.07487331818976062, relative_size_complement=0.9251266818102394, coverage_sg=0.15503875968992248, coverage_complement=0.8449612403100775, target_share_sg=0.046674445740956826, target_share_complement=0.02058740202096515, target_share_dataset=0.022540625546042286, lift=2.0706810308177976)
Pandas(Index=1, quality=0.0017953465801277201, subgroup=(combo_type_name=='Zapdos'), size_sg=996, size_dataset=11446, positives_sg=43, positives_dataset=258, size_complement=10450, relative_size_sg=0.0870172986196051, relative_size_complement=0.912982701380395, coverage_sg=0.16666666666666666, coverage_complement=0.8333333333333334, target_share_sg=0.04317269076305221, target_share_complement=0.02057416267942584, target_share_dataset=0.022540625546042286, lift=1.9153279785809907)
Pandas(Index=2, qual

### A ideia é achar subgrupos ligados a carta Jirachi - Mas pela qualidade não deu muito certo

-----------------------------------------------------------------

- Teste top cartas


In [87]:
df = df_original.copy()

df = df[(df['ranking_player_tournament'] != np.nan)]
#df.replace(np.nan, 0, inplace=True)
df.columns

Index(['id_card', 'name_card', 'amount_card', 'price_card', 'energy_type_card',
       'type_card', 'combo_type_id', 'combo_type_name', 'id_player',
       'name_player', 'country_player', 'all_time_score',
       'ranking_player_tournament', 'id_tournament', 'category_tournament',
       'name_tournament', 'region_tournament', 'country_tournament',
       'year_tournament', 'month_tournament', 'day_tournament',
       'valid_rotation_at_tournament', 'rotation_name', 'year_begin',
       'month_begin', 'day_begin'],
      dtype='object')

In [88]:
df.describe

<bound method NDFrame.describe of         id_card         name_card  amount_card  price_card energy_type_card  \
0        SSP272          Archeops            4        0.70        Colorless   
1        SIT138           Lugia V            3        6.20        Colorless   
2        SIT139       Lugia VSTAR            3        8.71        Colorless   
3        LOR143           Snorlax            3        0.93        Colorless   
4        SSP250        Lumineon V            2        1.90            Water   
...         ...               ...          ...         ...              ...   
114286  GRI130a  Rescue Stretcher            2        0.72              NaN   
114287   GRI119        Aqua Patch            2        0.27              NaN   
114288   FLF092           Pal Pad            1        0.68              NaN   
114289   GRI121       Choice Band            2        0.20              NaN   
114290   GRI120     Brooklet Hill            2        0.32              NaN   

       type_card 

In [89]:
df = df[['name_card', 'amount_card', 'price_card', 'energy_type_card', 'type_card', 'name_player', 'all_time_score', 'category_tournament' ,'ranking_player_tournament']]


In [90]:
df = df.dropna(subset=['name_card', 'amount_card', 'price_card', 'energy_type_card', 'type_card', 'name_player', 'all_time_score', 'category_tournament' ,'ranking_player_tournament'])

df.describe

<bound method NDFrame.describe of            name_card  amount_card  price_card energy_type_card type_card  \
0           Archeops            4        0.70        Colorless   Pokémon   
1            Lugia V            3        6.20        Colorless   Pokémon   
2        Lugia VSTAR            3        8.71        Colorless   Pokémon   
3            Snorlax            3        0.93        Colorless   Pokémon   
4         Lumineon V            2        1.90            Water   Pokémon   
...              ...          ...         ...              ...       ...   
114273       Froakie            4        0.20            Water   Pokémon   
114274     Frogadier            4        0.33            Water   Pokémon   
114275   Greninja-GX            3        7.32               GX   Pokémon   
114276  Tapu Lele-GX            1        3.39               GX   Pokémon   
114277     Tapu Koko            1        3.04        Lightning   Pokémon   

             name_player  all_time_score category_tou

In [72]:
df['top_position'] = (df['ranking_player_tournament'] <= 3).astype(int)

# Define the target for subgroup discovery
target = ps.BinaryTarget('top_position', 1)

# Define selectors for relevant features
namep_selectors = [ps.EqualitySelector('name_player', value) for value in df['name_player'].unique()]
energy_selectors = [ps.EqualitySelector('energy_type_card', value) for value in df['energy_type_card'].unique()]
type_selectors = [ps.EqualitySelector('type_card', value) for value in df['type_card'].unique()]
name_selectors = [ps.EqualitySelector('name_card', value) for value in df['name_card'].unique()]

# Combine all selectors to form the search space
search_space = namep_selectors + energy_selectors + name_selectors + type_selectors

search_space = ps.create_selectors(df, ignore=['top_position'])

# Create the Subgroup Discovery Task
task = ps.SubgroupDiscoveryTask(
    df, 
    target, 
    search_space, 
    result_set_size=10, 
    depth=6, 
    qf=ps.WRAccQF()  # Quality function to evaluate subgroups
)

# Run the SD-Map algorithm
result = ps.BeamSearch().execute(task)

df_result = result.to_dataframe()

df_result

# Display the results
#for row in result.to_dataframe().itertuples():
    #print(row.subgroup)

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.058408,ranking_player_tournament<10,8412,42447,3092,3092,34035,0.198177,0.801823,1.0,0.0,0.36757,0.0,0.072844,5.046006
1,0.058408,ranking_player_tournament<10 AND type_card=='P...,8412,42447,3092,3092,34035,0.198177,0.801823,1.0,0.0,0.36757,0.0,0.072844,5.046006
2,0.030103,amount_card==1 AND ranking_player_tournament<10,4286,42447,1590,3092,38161,0.100973,0.899027,0.51423,0.48577,0.370975,0.03936,0.072844,5.092751
3,0.030103,amount_card==1 AND ranking_player_tournament<1...,4286,42447,1590,3092,38161,0.100973,0.899027,0.51423,0.48577,0.370975,0.03936,0.072844,5.092751
4,0.024764,category_tournament=='others' AND ranking_play...,3375,42447,1297,3092,39072,0.079511,0.920489,0.41947,0.58053,0.384296,0.045941,0.072844,5.275623
5,0.024764,category_tournament=='others' AND ranking_play...,3375,42447,1297,3092,39072,0.079511,0.920489,0.41947,0.58053,0.384296,0.045941,0.072844,5.275623
6,0.024366,all_time_score>=156 AND ranking_player_tournam...,2632,42447,1226,3092,39815,0.062007,0.937993,0.396507,0.603493,0.465805,0.046867,0.072844,6.394581
7,0.024366,all_time_score>=156 AND ranking_player_tournam...,2632,42447,1226,3092,39815,0.062007,0.937993,0.396507,0.603493,0.465805,0.046867,0.072844,6.394581
8,0.017975,category_tournament=='others',7331,42447,1297,3092,35116,0.172709,0.827291,0.41947,0.58053,0.17692,0.051116,0.072844,2.428758
9,0.017975,category_tournament=='others' AND type_card=='...,7331,42447,1297,3092,35116,0.172709,0.827291,0.41947,0.58053,0.17692,0.051116,0.072844,2.428758


- Como possível ver, muita redundancia nos subgrupos do beam search

# PRECISA ANALISAR RESULTADO