## Bibliotecas Utilizadas e Carregamento do Dataset

In [8]:
import numpy as np
import pandas as pd
import pysubgroup as ps
from mlxtend.frequent_patterns import apriori, association_rules

## Tratando o Dataframe

O dataset é carregado a partir do arquivo csv. São realizados alguns tratamentos, como a substituição dos valores nulos por 0 e eliminar cartas do tipo treinador. Após isso, o dataset é checado para identificar as regiões de torneio presentes.


In [9]:
df_original = pd.read_csv('./Data/tournaments.csv')
df = df_original.copy()

df.replace(np.nan, 0, inplace=True)

df = df[df['region_tournament'] != 0]
df = df[df['type_card'] != 'Trainer']
regions = df['region_tournament'].unique()

### Agrupamento de Cartas por Jogador e Região

O dataset é agrupado em cartas por jogador e é aplicado o algoritmo Apriori para encontrar padrões frequentes de cartas utilizadas pelos jogadores.

O algoritmo Apriori é uma abordagem clássica feita para mineração de regras de associação. Ele é útil geralmente para recomendações de produtos e, nesse caso, analisar itemsets frequentes de cartas.

In [10]:
def agrupa_jogador_cartas(df):
    dict_jogador_cartas = {}
    for _, row in df.iterrows():
        if row['name_player'] not in dict_jogador_cartas:
            dict_jogador_cartas[row['name_player']] = []
        dict_jogador_cartas[row['name_player']].append(row['name_card'])
    return len(dict_jogador_cartas), dict_jogador_cartas

info_por_regiao = {}
for region in regions:
    df_region = df[(df['region_tournament'] == region) & (df['year_tournament'] == 2019)]
    jogadores = df_region['name_player'].unique()
    cartas = df_region['name_card'].unique()

    quantidade_jogadores, dict_jogador_cartas = agrupa_jogador_cartas(df_region)

    df_jogadores_cartas = pd.DataFrame(columns=cartas)

    for jogador in jogadores:
        cartas_jogador = dict_jogador_cartas[jogador]
        linha = [True if carta in cartas_jogador else False for carta in cartas]
        df_jogadores_cartas.loc[len(df_jogadores_cartas)] = linha

    frequent_itemsets = apriori(df_jogadores_cartas, min_support=0.22, use_colnames=True)
    frequent_itemsets.sort_values(by='support', ascending=False, inplace=True)

    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.4)
    rules = rules.sort_values(by='lift', ascending=False).drop(['antecedent support', 'consequent support', 'leverage', 'conviction', 'zhangs_metric'], axis=1)
    
    info_por_regiao[region] = {'itemsets': frequent_itemsets[:3], 'rules': rules[:3]}

    print(f"Total de jogadores: {quantidade_jogadores}")
    print("Itemsets frequentes")
    print(frequent_itemsets)
    print("Regras")
    print(rules)
    print("\n\n")

Total de jogadores: 73
Itemsets frequentes
    support                                    itemsets
0  0.589041                                   (Jirachi)
1  0.493151                                (Dedenne-GX)
2  0.410959              (Mega Lopunny & Jigglypuff-GX)
6  0.342466                       (Jirachi, Dedenne-GX)
3  0.301370               (Arceus & Dialga & Palkia-GX)
7  0.287671  (Mega Lopunny & Jigglypuff-GX, Dedenne-GX)
4  0.246575                                 (Keldeo-GX)
5  0.246575                                 (Cryogonal)
8  0.246575    (Arceus & Dialga & Palkia-GX, Cryogonal)
Regras
                      antecedents                     consequents   support  \
4   (Arceus & Dialga & Palkia-GX)                     (Cryogonal)  0.246575   
5                     (Cryogonal)   (Arceus & Dialga & Palkia-GX)  0.246575   
3                    (Dedenne-GX)  (Mega Lopunny & Jigglypuff-GX)  0.287671   
2  (Mega Lopunny & Jigglypuff-GX)                    (Dedenne-GX)  0.28767

### Análise por região

In [11]:
for region in regions:
    print(f"Região: {region}")
    print("Itemsets frequentes")
    print(info_por_regiao[region]['itemsets'])
    print("Regras")
    print(info_por_regiao[region]['rules'])
    print("\n\n")

Região: SA
Itemsets frequentes
    support                        itemsets
0  0.589041                       (Jirachi)
1  0.493151                    (Dedenne-GX)
2  0.410959  (Mega Lopunny & Jigglypuff-GX)
Regras
                     antecedents                     consequents   support  \
4  (Arceus & Dialga & Palkia-GX)                     (Cryogonal)  0.246575   
5                    (Cryogonal)   (Arceus & Dialga & Palkia-GX)  0.246575   
3                   (Dedenne-GX)  (Mega Lopunny & Jigglypuff-GX)  0.287671   

   confidence      lift  
4    0.818182  3.318182  
5    1.000000  3.318182  
3    0.583333  1.419444  



Região: EU
Itemsets frequentes
    support       itemsets
5  0.650602  (Tapu Koko ♢)
0  0.590361      (Jirachi)
7  0.590361    (Marshadow)
Regras
                                antecedents                  consequents  \
334             (Tapu Koko ♢, Tapu Lele-GX)      (Marshadow, Zeraora-GX)   
337                 (Marshadow, Zeraora-GX)  (Tapu Koko ♢, Tapu Lele

### Descoberta de Subgrupos

In [12]:
df = df_original.copy()
df = df[(df['ranking_player_tournament'] != np.nan)]

df = df[['name_card', 'energy_type_card', 'ranking_player_tournament', 'combo_type_name']]
df = df.dropna(subset=['name_card', 'combo_type_name', 'ranking_player_tournament'])

df['top_position'] = (df['ranking_player_tournament'] <= 15).astype(int)

target = ps.BinaryTarget('top_position', 1)

name_selectors = [ps.EqualitySelector('name_card', value) for value in df['name_card'].unique()]
energy_selectors = [ps.EqualitySelector('energy_type_card', value) for value in df['energy_type_card'].unique()]
combo_type_selector = [ps.EqualitySelector('combo_type_name', value) for value in df['combo_type_name'].unique()]

search_space = name_selectors + energy_selectors + combo_type_selector

task = ps.SubgroupDiscoveryTask(
    df, 
    target, 
    search_space, 
    result_set_size=6, 
    depth=4, 
    qf=ps.WRAccQF()
)

result = ps.BeamSearch().execute(task)

df_result = result.to_dataframe()
df_result

Unnamed: 0,quality,subgroup,size_sg,size_dataset,positives_sg,positives_dataset,size_complement,relative_size_sg,relative_size_complement,coverage_sg,coverage_complement,target_share_sg,target_share_complement,target_share_dataset,lift
0,0.00404,energy_type_card=='GX',3137,114244,1387,33702,111107,0.027459,0.972541,0.041155,0.958845,0.442142,0.290846,0.295,1.498786
1,0.002537,energy_type_card=='EX',926,114244,563,33702,113318,0.008105,0.991895,0.016705,0.983295,0.607991,0.292443,0.295,2.060986
2,0.002396,combo_type_name=='Mewtwo & Mew',862,114244,528,33702,113382,0.007545,0.992455,0.015667,0.984333,0.612529,0.292586,0.295,2.076368
3,0.002195,energy_type_card.isnull(),71811,114244,21435,33702,42433,0.628576,0.371424,0.636016,0.363984,0.298492,0.289091,0.295,1.011836
4,0.002125,combo_type_name=='Palkia Inteleon',2343,114244,934,33702,111901,0.020509,0.979491,0.027713,0.972287,0.398634,0.29283,0.295,1.351302
5,0.002096,combo_type_name=='Zacian ADP',995,114244,533,33702,113249,0.008709,0.991291,0.015815,0.984185,0.535678,0.292886,0.295,1.815858


### Análise de resultado por região

In [13]:
for region in regions:
    print(f"Região: {region}")
    print("Itemsets frequentes")
    print(info_por_regiao[region]['itemsets'])
    print("Regras")
    print(info_por_regiao[region]['rules'])
    print("\n\n")

Região: SA
Itemsets frequentes
    support                        itemsets
0  0.589041                       (Jirachi)
1  0.493151                    (Dedenne-GX)
2  0.410959  (Mega Lopunny & Jigglypuff-GX)
Regras
                     antecedents                     consequents   support  \
4  (Arceus & Dialga & Palkia-GX)                     (Cryogonal)  0.246575   
5                    (Cryogonal)   (Arceus & Dialga & Palkia-GX)  0.246575   
3                   (Dedenne-GX)  (Mega Lopunny & Jigglypuff-GX)  0.287671   

   confidence      lift  
4    0.818182  3.318182  
5    1.000000  3.318182  
3    0.583333  1.419444  



Região: EU
Itemsets frequentes
    support       itemsets
5  0.650602  (Tapu Koko ♢)
0  0.590361      (Jirachi)
7  0.590361    (Marshadow)
Regras
                                antecedents                  consequents  \
334             (Tapu Koko ♢, Tapu Lele-GX)      (Marshadow, Zeraora-GX)   
337                 (Marshadow, Zeraora-GX)  (Tapu Koko ♢, Tapu Lele

In [14]:
df['top_position'] = (df['ranking_player_tournament'] <= 15).astype(int)
target = ps.BinaryTarget('top_position', 1)

name_selectors = [ps.EqualitySelector('name_card', value) for value in df['name_card'].unique()]
energy_selectors = [ps.EqualitySelector('energy_type_card', value) for value in df['energy_type_card'].unique()]
combo_type_selectors = [ps.EqualitySelector('combo_type_name', value) for value in df['combo_type_name'].unique()]

search_space = name_selectors + energy_selectors + combo_type_selectors

task = ps.SubgroupDiscoveryTask(
    df,
    target,
    search_space,
    result_set_size=10,
    depth=4,
    qf=ps.WRAccQF()
)

result = ps.BeamSearch().execute(task)

df_result = result.to_dataframe()

print("Results with WRAccQF:")
print(df_result)


In [None]:
task_with_standard = ps.SubgroupDiscoveryTask(
    df,
    target,
    search_space,
    result_set_size=10,
    depth=4,
    qf=ps.StandardQF(1.0)
)

result_standard = ps.BeamSearch().execute(task_with_standard)
df_result_standard = result_standard.to_dataframe()

print("Results with StandardQF:")
print(df_result_standard)

Results with StandardQF:
    quality                                           subgroup  size_sg  \
0  0.004040                             energy_type_card=='GX'     3137   
1  0.002537                             energy_type_card=='EX'      926   
2  0.002396                    combo_type_name=='Mewtwo & Mew'      862   
3  0.002195                          energy_type_card.isnull()    71811   
4  0.002125                 combo_type_name=='Palkia Inteleon'     2343   
5  0.002096                      combo_type_name=='Zacian ADP'      995   
6  0.001888                          combo_type_name=='Zapdos'     1184   
7  0.001708                     combo_type_name=='Night March'      637   
8  0.001527  combo_type_name=='Palkia Inteleon' AND energy_...     1534   
9  0.001476            combo_type_name=='Reshiram & Charizard'      398   

   size_dataset  positives_sg  positives_dataset  size_complement  \
0        114244          1387              33702           111107   
1        11