## **THE APRIORI ALGORITHM**

In [24]:
import pandas as pd
import os
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori, association_rules


path = r'C:\Users\moham\Apriori_VS_Word2Vec\Dataset'
excel_file = 'df_merged_items_category.xlsx'
excel_file_path = os.path.join(path, excel_file)

def load_dataset(file_path):
    return pd.read_excel(file_path)

data_excel = load_dataset(excel_file_path)
basket = data_excel.groupby('BillNo')['Itemname'].apply(list)


data_excel.dropna(subset=['Itemname'], inplace=True)   

In [25]:
# Convert the basket into a one-hot encoded DataFrame
# Initialize TransactionEncoder
te = TransactionEncoder()

# Fit the encoder to the basket data and transform it into a binary matrix
# Each row represents a transaction, each column represents an item
# A 'True' value means the item is present in that transaction
te_ary = te.fit(basket).transform(basket)

# Create a DataFrame from the binary matrix with item names as columns
basket_encoded = pd.DataFrame(te_ary, columns=te.columns_)

# Display info about the transformed data
print(f"Transaction matrix shape: {te_ary.shape} (transactions × unique items)")
print(f"Number of unique items: {len(te.columns_)}")

# Display a sample of the original basket data for reference
print("\nSample of original transaction baskets:")
print(basket.head())

Transaction matrix shape: (20208, 4185) (transactions × unique items)
Number of unique items: 4185

Sample of original transaction baskets:
BillNo
536365    [WHITE HANGING HEART T-LIGHT HOLDER, WHITE MET...
536366    [HAND WARMER UNION JACK, HAND WARMER RED POLKA...
536367    [ASSORTED COLOUR BIRD ORNAMENT, POPPY'S PLAYHO...
536368    [JAM MAKING SET WITH JARS, RED COAT RACK PARIS...
536369                           [BATH BUILDING BLOCK WORD]
Name: Itemname, dtype: object


In [26]:

# Generate frequent itemsets using the Apriori algorithm
# `min_support=0.01` specifies the minimum support threshold for an itemset to be considered frequent
frequent_itemsets = apriori(basket_encoded, min_support=0.01, use_colnames=True)

# Generate association rules from the frequent itemsets
# `metric="confidence"` specifies that the rules should be evaluated based on confidence
# `min_threshold=0.05` sets the minimum confidence threshold for the rules
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.05)

In [27]:
# Generate association rules from the frequent itemsets
# `metric="lift"` specifies that the rules should be evaluated based on lift
# `min_threshold=1.2` sets the minimum lift threshold for the rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1.2)

In [28]:
# Add a new column "antecedent_len" to the rules DataFrame
# This column calculates the length of the antecedents for each rule
rules["antecedent_len"] = rules["antecedents"].apply(lambda x: len(x))

In [36]:
filtered_rules = rules[(rules['antecedent_len'] >= 2) &
                                      (rules['confidence'] > 0.75) &
                                      (rules['lift'] > 1.2)]

filtered_rules.to_excel("filtered_rules.xlsx", index=False)

In [None]:
# Check if the 'antecedent support' column exists in the rules DataFrame
if 'antecedent support' in rules.columns:
    # If it exists, use it directly to calculate the 'coverage' column
    rules['coverage'] = rules['antecedent support']
else:
    # If it doesn't exist, calculate 'coverage' using the formula: support / confidence
    rules['coverage'] = rules['support'] / rules['confidence']

In [37]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric,antecedent_len,coverage
0,(6 RIBBONS RUSTIC CHARM),(JAM MAKING SET PRINTED),0.046615,0.055226,0.011530,0.247346,4.478826,0.008956,1.255257,0.814705,1,0.046615
1,(JAM MAKING SET PRINTED),(6 RIBBONS RUSTIC CHARM),0.055226,0.046615,0.011530,0.208781,4.478826,0.008956,1.204957,0.822130,1,0.055226
2,(6 RIBBONS RUSTIC CHARM),(JAM MAKING SET WITH JARS),0.046615,0.053890,0.010095,0.216561,4.018599,0.007583,1.207637,0.787884,1,0.046615
3,(JAM MAKING SET WITH JARS),(6 RIBBONS RUSTIC CHARM),0.053890,0.046615,0.010095,0.187328,4.018599,0.007583,1.173148,0.793942,1,0.053890
4,(6 RIBBONS RUSTIC CHARM),(JUMBO BAG RED RETROSPOT),0.046615,0.102138,0.010689,0.229299,2.245001,0.005928,1.164995,0.581681,1,0.046615
...,...,...,...,...,...,...,...,...,...,...,...,...
2999,(RED RETROSPOT CHARLOTTE BAG),"(CHARLOTTE BAG PINK POLKADOT, CHARLOTTE BAG SU...",0.050871,0.010936,0.010046,0.197471,18.056517,0.009489,1.232433,0.995248,1,0.050871
3000,(CHARLOTTE BAG PINK POLKADOT),"(WOODLAND CHARLOTTE BAG, STRAWBERRY CHARLOTTE ...",0.036520,0.012767,0.010046,0.275068,21.544841,0.009579,1.361828,0.989730,1,0.036520
3001,(CHARLOTTE BAG SUKI DESIGN),"(CHARLOTTE BAG PINK POLKADOT, STRAWBERRY CHARL...",0.043300,0.011926,0.010046,0.232000,19.453344,0.009529,1.286555,0.991528,1,0.043300
3002,(WOODLAND CHARLOTTE BAG),"(CHARLOTTE BAG PINK POLKADOT, STRAWBERRY CHARL...",0.040924,0.012074,0.010046,0.245466,20.329375,0.009551,1.309318,0.991382,1,0.040924


## **COMPARASION METRICS**