In [9]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
# Load the dataset
df = pd.read_excel('Online_Retail.xlsx')  # Adjust to your dataset path
# Clean the data
df['Description'] = df['Description'].str.strip()
df.dropna(axis=0, subset=['InvoiceNo'], inplace=True)
df['InvoiceNo'] = df['InvoiceNo'].astype('str')
df = df[~df['InvoiceNo'].str.contains('C')]  # Remove credit transactions
# Consolidate items into transactions
basket = df[df['Country'] == "France"] \
    .groupby(['InvoiceNo', 'Description'])['Quantity'] \
    .sum().unstack().reset_index().fillna(0) \
    .set_index('InvoiceNo')
# Convert quantities into 1 (purchased) and 0 (not purchased)
def encode_units(x):
    return 1 if x >= 1 else 0  # Convert to 1 for positive values, else 0
# Use applymap for element-wise application
basket_sets = basket.applymap(encode_units)  # Apply to each cell
# Drop the 'POSTAGE' column, which might not be relevant for analysis
basket_sets.drop('POSTAGE', inplace=True, axis=1)
# Apply Apriori algorithm with 5% support
frequent_itemsets = apriori(basket_sets, min_support=0.07, use_colnames=True)
# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1, num_itemsets=2)  # Specify num_itemsets
# Filter rules with high lift and confidence
filtered_rules = rules[(rules['lift'] >= 6) & (rules['confidence'] >= 0.8)]
# Display the results
print("Frequent Itemsets:\n", frequent_itemsets)
print("\nAssociation Rules:\n", rules)
print("\nFiltered Rules (Lift >= 6, Confidence >= 0.8):\n", filtered_rules)

  basket_sets = basket.applymap(encode_units)  # Apply to each cell


Frequent Itemsets:
      support                                           itemsets
0   0.071429                      (4 TRADITIONAL SPINNING TOPS)
1   0.096939                       (ALARM CLOCK BAKELIKE GREEN)
2   0.102041                        (ALARM CLOCK BAKELIKE PINK)
3   0.094388                         (ALARM CLOCK BAKELIKE RED)
4   0.081633                     (BAKING SET 9 PIECE RETROSPOT)
5   0.071429                     (CHILDRENS CUTLERY DOLLY GIRL)
6   0.099490                             (DOLLY GIRL LUNCH BOX)
7   0.096939                          (JUMBO BAG RED RETROSPOT)
8   0.076531                       (JUMBO BAG WOODLAND ANIMALS)
9   0.125000                           (LUNCH BAG APPLE DESIGN)
10  0.084184                      (LUNCH BAG DOLLY GIRL DESIGN)
11  0.153061                          (LUNCH BAG RED RETROSPOT)
12  0.119898                        (LUNCH BAG SPACEBOY DESIGN)
13  0.117347                               (LUNCH BAG WOODLAND)
14  0.142857        

