In [9]:
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

# Step 1: Load the CSV file with encoding specified
df = pd.read_csv('OnlineRetail.csv', encoding='ISO-8859-1')
df.head()

# Step 2: Data Cleanup
df['Description'] = df['Description'].str.strip()
df.dropna(axis=0, subset=['InvoiceNo'], inplace=True)
df['InvoiceNo'] = df['InvoiceNo'].astype('str')
df = df[~df['InvoiceNo'].str.contains('C')]

# Step 3: Consolidate items into transactions
basket = (df[df['Country'] == "France"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

# Step 4: Convert quantities to 1 and 0
def encode_units(x):
    return 1 if x >= 1 else 0

basket_sets = basket.applymap(encode_units)
if 'POSTAGE' in basket_sets.columns:
    basket_sets.drop('POSTAGE', inplace=True, axis=1)

# Step 5: Generate frequent itemsets with support >= 5%
frequent_itemsets = apriori(basket_sets, min_support=0.05, use_colnames=True)
print("Frequent Itemsets:")
print(frequent_itemsets)

# Step 6: Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1, num_itemsets=len(frequent_itemsets))
print("\nAssociation Rules:")
print(rules.head())


  basket_sets = basket.applymap(encode_units)


Frequent Itemsets:
      support                                           itemsets
0    0.071429                      (4 TRADITIONAL SPINNING TOPS)
1    0.096939                       (ALARM CLOCK BAKELIKE GREEN)
2    0.102041                        (ALARM CLOCK BAKELIKE PINK)
3    0.094388                         (ALARM CLOCK BAKELIKE RED)
4    0.068878                       (ASSORTED COLOUR MINI CASES)
..        ...                                                ...
108  0.102041  (SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...
109  0.122449  (SET/6 RED SPOTTY PAPER PLATES, SET/6 RED SPOT...
110  0.063776  (ALARM CLOCK BAKELIKE RED, ALARM CLOCK BAKELIK...
111  0.068878  (PLASTERS IN TIN WOODLAND ANIMALS, PLASTERS IN...
112  0.099490  (SET/6 RED SPOTTY PAPER PLATES, SET/20 RED RET...

[113 rows x 2 columns]

Association Rules:
                    antecedents                   consequents  \
0  (ALARM CLOCK BAKELIKE GREEN)   (ALARM CLOCK BAKELIKE PINK)   
1   (ALARM CLOCK BAKELIKE P

