# Association Rule Learning
We can use ML to find relationships between variables in datasets. These techniques are commonly applied to data mining, market basket analysis, and recommendation systems.

In [None]:
from bs4 import BeautifulSoup
import pandas as pd
import requests

url = "https://victoryroad.pro/2024-naic/"
response = requests.get(url, headers={"User-Agent": "Mozilla/5.0"}, verify=False)

# Check if the request was successful
if response.status_code == 200:
    html_content = response.text  # Get the HTML content
    soup = BeautifulSoup(html_content, "html.parser")  # Parse the HTML
    print("Successfully fetched the HTML content!")
else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")

In [None]:
soup = BeautifulSoup(html_content, "html.parser")

teams = []
team_columns = ["p0", "p1", "p2", "p3", "p4", "p5"]
for row in soup.find_all("tr")[1:]:  # Skip the header row
    team_div = row.find("div", class_="table-team-wrapper")
    if team_div:
        # Extract all Pokémon names from the `title` attribute in <img> tags
        pokemon = [img["title"] for img in team_div.find_all("img")]
        if len(pokemon) == 6:  # Ensure each row has exactly 6 Pokémon
            teams.append(pokemon)

df = pd.DataFrame(teams, columns=team_columns)

In [None]:
# teams = []
# team_columns = ["Player", "p0", "p1", "p2", "p3", "p4", "p5"]

# for row in soup.find_all("tr")[1:]:  # Skip the header row
#     # Extract Player Name
#     if len(row.find_all("td")) > 3:
#         player_td = row.find_all("td")[3]  # 4th column (index 3) is the Player column
#         player_name = player_td.find("b").text.strip() if player_td.find("b") else player_td.text.strip()

#     # Extract Pokémon Team
#     team_div = row.find("div", class_="table-team-wrapper")
#     if team_div:
#         pokemon = [img["title"] for img in team_div.find_all("img")]
#         if len(pokemon) == 6:  # Ensure each row has exactly 6 Pokémon
#             teams.append([player_name] + pokemon)  # Add player name as the first column

# df = pd.DataFrame(teams, columns=team_columns)
# df

In [27]:
df

Unnamed: 0,p0,p1,p2,p3,p4,p5
0,Calyrex Ice,Rillaboom,Incineroar,Urshifu Rapid,Pelipper,Raging Bolt
1,Terapagos Terastal,Flutter Mane,Chi Yu,Urshifu Rapid,Grimmsnarl,Iron Hands
2,Calyrex Shadow,Rillaboom,Incineroar,Urshifu Rapid,Smeargle,Raging Bolt


Support: Measures how frequently the itemset appears in the dataset.

$$ \text{Support(A)} = \frac{\text{Number of sets containing A}}{\text{Number of sets}}  $$

<br>
Confidence: Measures how often we expect to see items together.


$$ \text{Confidence(A, B)} = P(B|A) = \frac{\text{Number of sets containing A and B}}{\text{Number of sets containing A}}  $$

<br>
Lift: Measures the strength of an association rule relative to the independent occurances.

$$ \text{Lift(A, B)} = \frac{\text{Confidence(A, B)}}{\text{Support(A)}}  $$


In [38]:
# Support for Incineroar
incineroar_count = df[team_columns].apply(lambda row: "Incineroar" in row.values, axis=1).sum()
team_count = len(df)
support_incineroar = incineroar_count / team_count
print(f"Count of Incineroar in teams: {incineroar_count}")
print(f"Total count of teams: {team_count}")
print(f"Support for Incineroar: {support_incineroar:.3f}")

Count of Incineroar in teams: 129
Total count of teams: 296
Support for Incineroar: 0.436


In [39]:
# Confidence for Incineroar and Rillaboom together
both_count = df[team_columns].apply(lambda row: ("Incineroar" in row.values) and ("Rillaboom" in row.values), axis=1).sum()
confidence_incineroar_rillaboom = both_count / incineroar_count if incineroar_count > 0 else 0
print(f"Count of Incineroar and Rillaboom in teams: {both_count}")
print(f"Count of Incineroar in teams: {incineroar_count}")
print(f"Confidence for Incineroar and Rillaboom: {confidence_incineroar_rillaboom:.3f}")

Count of Incineroar and Rillaboom in teams: 59
Count of Incineroar in teams: 129
Confidence for Incineroar and Rillaboom: 0.457


In [35]:
# Lift 
print(f"Lift for Incineroar and Rillaboom: {confidence_incineroar_rillaboom / support_incineroar:.2f}")

Lift for Incineroar and Rillaboom: 1.05


The lift indicates that the likelihood of Incineroar and Rillaboom being together is 1.05 times higher than that of Incineroar being used alone.

## Algorithms 
- Apriori is the classical approach. It's useful for small datasets or when interpretability is crucial.
- FP-Growth is generally better than Apriori in terms of efficiency and scalability.

In [45]:
from mlxtend.frequent_patterns import apriori, fpgrowth, fpmax, association_rules
from mlxtend.preprocessing import TransactionEncoder

# Convert the DataFrame into a list of lists format
transactions = df.values.tolist()

# Convert transactions into one-hot encoded format
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

min_support = 0.2
frequent_itemsets_apriori = apriori(df_encoded, min_support=min_support, use_colnames=True)
frequent_itemsets_fpgrowth = fpgrowth(df_encoded, min_support=min_support, use_colnames=True)

In [50]:
# frequent_itemsets_apriori
frequent_itemsets_fpgrowth

Unnamed: 0,support,itemsets
0,0.435811,(Incineroar)
1,0.385135,(Urshifu Rapid)
2,0.341216,(Rillaboom)
3,0.307432,(Raging Bolt)
4,0.236486,(Iron Hands)
5,0.216216,(Flutter Mane)
6,0.202703,(Chi Yu)
7,0.246622,(Amoonguss)
8,0.222973,(Tornadus Incarnate)
9,0.22973,"(Urshifu Rapid, Incineroar)"


In [51]:
# Generate Association Rules for Apriori
association_rules_apriori = association_rules(frequent_itemsets_apriori, 
                                              num_itemsets=team_count, 
                                              metric="confidence", 
                                              min_threshold=0.2)
association_rules_apriori

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,representativity,leverage,conviction,zhangs_metric,jaccard,certainty,kulczynski
0,(Urshifu Rapid),(Incineroar),0.385135,0.435811,0.22973,0.596491,1.368693,1.0,0.061884,1.398208,0.438106,0.388571,0.284799,0.561812
1,(Incineroar),(Urshifu Rapid),0.435811,0.385135,0.22973,0.527132,1.368693,1.0,0.061884,1.300288,0.477457,0.388571,0.23094,0.561812


In [None]:
association_rules_fpgrowth = association_rules(frequent_itemsets_fpgrowth, 
                                              num_itemsets=team_count, 
                                              metric="confidence", 
                                              min_threshold=0.2)
association_rules_fpgrowth