## Assignment 6: Association Analysis

**Submitted by: Indronil Bhattacharjee**

In [1]:
import numpy as np
import pandas as pd

In [2]:
from mlxtend.frequent_patterns import apriori, association_rules

# Step 1: Load the dataset
data = pd.read_csv("/kaggle/input/online-retail/Online_Retail.csv", encoding="ISO-8859-1")

In [3]:
# Step 2: Data Preprocessing-Remove whitespaces in the Description column
data['Description'] = data['Description'].str.strip()

# Step 3: Perform one-hot encoding for the items
basket = (data.groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

# Step 4: Convert quantities to binary format
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1

basket_sets = basket.applymap(encode_units)

In [6]:
# Step 5: Market Basket Analysis with Apriori
frequent_itemsets = apriori(basket_sets, min_support=0.02, use_colnames=True)

# Step 6: Generate Association Rules
association_rules_df = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.7)

In [7]:
# Step 7: Select the Top 10 Rules based on confidence
top_10_rules = association_rules_df.nlargest(10, 'confidence')

print(top_10_rules)

                                         antecedents  \
6  (ROSES REGENCY TEACUP AND SAUCER, PINK REGENCY...   
8  (PINK REGENCY TEACUP AND SAUCER, GREEN REGENCY...   
2                   (PINK REGENCY TEACUP AND SAUCER)   
5                   (PINK REGENCY TEACUP AND SAUCER)   
4                  (GREEN REGENCY TEACUP AND SAUCER)   
3                  (ROSES REGENCY TEACUP AND SAUCER)   
1                (GARDENERS KNEELING PAD CUP OF TEA)   
9                   (PINK REGENCY TEACUP AND SAUCER)   
7  (ROSES REGENCY TEACUP AND SAUCER, GREEN REGENC...   
0                      (CHARLOTTE BAG PINK POLKADOT)   

                                         consequents  antecedent support  \
6                  (GREEN REGENCY TEACUP AND SAUCER)            0.024503   
8                  (ROSES REGENCY TEACUP AND SAUCER)            0.025894   
2                  (GREEN REGENCY TEACUP AND SAUCER)            0.031334   
5                  (ROSES REGENCY TEACUP AND SAUCER)            0.031334   
4  