In [18]:
import pandas as pd
import numpy as np
from apyori import apriori

In [19]:
df = pd.read_excel('data/online_retail_II.xlsx')
df.head()

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085.0,United Kingdom
1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
2,489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01 07:45:00,2.1,13085.0,United Kingdom
4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01 07:45:00,1.25,13085.0,United Kingdom


In [20]:
# replacing empty value with 0.
df.fillna(0,inplace=True)

In [21]:
df.head()

Unnamed: 0,Invoice,StockCode,Description,Quantity,InvoiceDate,Price,Customer ID,Country
0,489434,85048,15CM CHRISTMAS GLASS BALL 20 LIGHTS,12,2009-12-01 07:45:00,6.95,13085.0,United Kingdom
1,489434,79323P,PINK CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
2,489434,79323W,WHITE CHERRY LIGHTS,12,2009-12-01 07:45:00,6.75,13085.0,United Kingdom
3,489434,22041,"RECORD FRAME 7"" SINGLE SIZE",48,2009-12-01 07:45:00,2.1,13085.0,United Kingdom
4,489434,21232,STRAWBERRY CERAMIC TRINKET BOX,24,2009-12-01 07:45:00,1.25,13085.0,United Kingdom


In [22]:
# Verify column names
print("Columns in the DataFrame:", df.columns)

# Drop rows with missing values
df = df.dropna(subset=['Invoice', 'StockCode', 'Description'])  # Use the correct column names

# Remove credit transactions (those starting with 'C')
df = df[~df['Invoice'].astype(str).str.startswith('C')]

# Group by Invoice and aggregate items into a list
transactions = df.groupby('Invoice')['Description'].apply(list).values

# Convert transactions to a list of lists and remove duplicates
transaction_list = [list(set(transaction)) for transaction in transactions]
print("Total transactions:", len(transaction_list))


Columns in the DataFrame: Index(['Invoice', 'StockCode', 'Description', 'Quantity', 'InvoiceDate',
       'Price', 'Customer ID', 'Country'],
      dtype='object')
Total transactions: 24224


In [23]:
# Debugging: Print a sample of the transaction_list
print("Sample transactions before apriori:")
for t in transaction_list[:5]:  # Print the first 5 transactions
    print(t)


Sample transactions before apriori:
['SAVE THE PLANET MUG', 'PINK DOUGHNUT TRINKET POT ', 'RECORD FRAME 7" SINGLE SIZE ', 'FANCY FONT HOME SWEET HOME DOORMAT', '15CM CHRISTMAS GLASS BALL 20 LIGHTS', ' WHITE CHERRY LIGHTS', 'PINK CHERRY LIGHTS', 'STRAWBERRY CERAMIC TRINKET BOX']
['LUNCHBOX WITH CUTLERY FAIRY CAKES ', 'HEART MEASURING SPOONS LARGE', 'CAT BOWL ', 'DOG BOWL , CHASING BALL DESIGN']
['ASSORTED COLOUR BIRD ORNAMENT', 'PIZZA PLATE IN BOX', 'SET OF 3 BLACK FLYING DUCKS', 'CHRISTMAS CRAFT WHITE FAIRY ', 'BATH BUILDING BLOCK WORD', 'FULL ENGLISH BREAKFAST PLATE', 'LOVE BUILDING BLOCK WORD', 'SMALL MARSHMALLOWS PINK BOWL', 'HEART FILIGREE DOVE LARGE', 'BLACK DINER WALL CLOCK', 'PLEASE ONE PERSON  METAL SIGN', 'HEART IVORY TRELLIS LARGE', 'DOOR MAT BLACK FLOCK ', 'HOME BUILDING BLOCK WORD', 'SCOTTIE DOG HOT WATER BOTTLE', ' PEACE WOODEN BLOCK LETTERS', 'CLASSIC WHITE FRAME', 'BISCUITS SMALL BOWL LIGHT BLUE', 'AREA PATROLLED METAL SIGN']
['PEACE SMALL WOOD LETTERS', 'BLUE PADDED SOF

In [24]:
# Convert transactions to a list of lists, remove duplicates, and ensure all items are strings
transaction_list = [[str(item) for item in set(transaction)] for transaction in transactions]


In [25]:
# Set minimum support, confidence, and lift thresholds
min_support = 0.02
min_confidence = 0.3
min_lift = 1.0

# Run the Apriori algorithm
rules = apriori(transaction_list, min_support=min_support, 
                min_confidence=min_confidence, min_lift=min_lift)

# Convert results to a list for readability
results = list(rules)
print("Total rules generated:", len(results))


Total rules generated: 12


In [26]:
# Step 5: Display the Association Rules
print("\n=== Association Rules ===")
for rule in results:
    items = [x for x in rule.items]
    print(f"Rule: {items}")
    print(f"Support: {rule.support:.4f}")
    for ordered_stat in rule.ordered_statistics:
        print(f"Confidence: {ordered_stat.confidence:.4f}")
        print(f"Lift: {ordered_stat.lift:.4f}")
    print("-" * 30)



=== Association Rules ===
Rule: ['PACK OF 60 PINK PAISLEY CAKE CASES', '60 TEATIME FAIRY CAKE CASES']
Support: 0.0245
Confidence: 0.4439
Lift: 9.3092
Confidence: 0.5134
Lift: 9.3092
------------------------------
Rule: ['PACK OF 72 RETRO SPOT CAKE CASES', '60 TEATIME FAIRY CAKE CASES']
Support: 0.0249
Confidence: 0.4513
Lift: 7.7542
Confidence: 0.4277
Lift: 7.7542
------------------------------
Rule: ['HEART OF WICKER LARGE', 'HEART OF WICKER SMALL']
Support: 0.0227
Confidence: 0.5027
Lift: 11.8351
Confidence: 0.5355
Lift: 11.8351
------------------------------
Rule: ['HOME BUILDING BLOCK WORD', 'LOVE BUILDING BLOCK WORD']
Support: 0.0241
Confidence: 0.4395
Lift: 9.7589
Confidence: 0.5362
Lift: 9.7589
------------------------------
Rule: ['HOME BUILDING BLOCK WORD', 'WHITE HANGING HEART T-LIGHT HOLDER']
Support: 0.0208
Confidence: 0.3794
Lift: 2.7717
------------------------------
Rule: ['LUNCH BAG SPACEBOY DESIGN ', 'LUNCH BAG SUKI  DESIGN ']
Support: 0.0201
Confidence: 0.4964
Lift: 

In [27]:
# Step 6: Display Frequent Itemsets in a DataFrame
results_df = pd.DataFrame([(tuple(result.items), result.support) 
                           for result in results], 
                          columns=['Itemset', 'Support'])

print("\nFrequent Itemsets:")
display(results_df)



Frequent Itemsets:


Unnamed: 0,Itemset,Support
0,"(PACK OF 60 PINK PAISLEY CAKE CASES, 60 TEATIM...",0.02448
1,"(PACK OF 72 RETRO SPOT CAKE CASES, 60 TEATIME ...",0.024893
2,"(HEART OF WICKER LARGE, HEART OF WICKER SMALL)",0.022746
3,"(HOME BUILDING BLOCK WORD, LOVE BUILDING BLOCK...",0.02415
4,"(HOME BUILDING BLOCK WORD, WHITE HANGING HEART...",0.020847
5,"(LUNCH BAG SPACEBOY DESIGN , LUNCH BAG SUKI D...",0.020104
6,"(PACK OF 60 PINK PAISLEY CAKE CASES, PACK OF 7...",0.022044
7,"(WHITE HANGING HEART T-LIGHT HOLDER, RED HANGI...",0.031828
8,"(SWEETHEART CERAMIC TRINKET BOX, STRAWBERRY CE...",0.03286
9,"(WHITE HANGING HEART T-LIGHT HOLDER, STRAWBERR...",0.022168
