# **Install Required Libraries**

In [None]:

!pip install mlxtend


import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules




# **Install Required Libraries**

In [None]:
# Load the dataset with proper encoding
df = pd.read_csv('/content/data.csv', encoding='ISO-8859-1')

# Check the first few rows to understand the dataset structure
df.head()


  and should_run_async(code)


Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


# **Data Preprocessing**

In [None]:
# Convert InvoiceNo and StockCode to string for further processing
df['InvoiceNo'] = df['InvoiceNo'].astype(str)
df['StockCode'] = df['StockCode'].astype(str)

# Remove any rows where the quantity is negative (which might indicate product returns)
df = df[df['Quantity'] > 0]

# Remove transactions where CustomerID is missing (if any)
df = df.dropna(subset=['CustomerID'])

# Create a pivot table where each row represents an InvoiceNo and each column represents a StockCode
# The values will be the quantity of each product in that transaction
basket = df.groupby(['InvoiceNo', 'StockCode'])['Quantity'].sum().unstack().fillna(0)

# Convert quantities to 1s and 0s: 1 for products that were bought, 0 for products that weren't
basket = basket.applymap(lambda x: 1 if x > 0 else 0)

# View the first few rows of the binary encoded basket
basket.head()


  and should_run_async(code)
  basket = basket.applymap(lambda x: 1 if x > 0 else 0)


StockCode,10002,10080,10120,10123C,10124A,10124G,10125,10133,10135,11001,...,90214V,90214W,90214Y,90214Z,BANK CHARGES,C2,DOT,M,PADS,POST
InvoiceNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
536365,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536366,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536367,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536368,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
536369,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


# **Apriori Algorithm to Find Frequent Itemsets**

In [None]:
from mlxtend.frequent_patterns import apriori

# Subset the first 10000 rows for analysis
basket_subset = basket.head(10000)

# Apply apriori on the subset data
frequent_itemsets = apriori(basket_subset, min_support=0.05, use_colnames=True)

# Display the first few frequent itemsets
frequent_itemsets.head()



  and should_run_async(code)


Unnamed: 0,support,itemsets
0,0.0744,(20725)
1,0.057,(20727)
2,0.0549,(20728)
3,0.0635,(21212)
4,0.0515,(22178)


# **Generate Association Rules**

In [None]:
from mlxtend.frequent_patterns import association_rules

# Generate the association rules from the frequent itemsets
# We will use the 'lift' metric
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Display the first few rules
rules.head()


  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric


In [None]:
import pandas as pd

# Load the dataset
df = pd.read_csv('/content/data.csv', encoding='ISO-8859-1')

# Group the data by 'StockCode' and sum the 'Quantity' to get total purchases for each product
top_purchases = df.groupby('StockCode')['Quantity'].sum().sort_values(ascending=False)

# Get the top 10 most purchased products
top_10_purchases = top_purchases.head(10)

# Print the top 10 purchases
print("Top 10 most purchased products:")
print(top_10_purchases)

# Optional: If you want to see the product descriptions along with the StockCode
top_10_purchases_with_description = df[df['StockCode'].isin(top_10_purchases.index)][['StockCode', 'Description']].drop_duplicates()
print("\nDescriptions of the Top 10 Products:")
print(top_10_purchases_with_description)


  and should_run_async(code)


Top 10 most purchased products:
StockCode
22197     56450
84077     53847
85099B    47363
85123A    38830
84879     36221
21212     36039
23084     30646
22492     26437
22616     26315
21977     24753
Name: Quantity, dtype: int64

Descriptions of the Top 10 Products:
       StockCode                          Description
0         85123A   WHITE HANGING HEART T-LIGHT HOLDER
9          84879        ASSORTED COLOUR BIRD ORNAMENT
44         22492              MINI PAINT SET VINTAGE 
96         21212      PACK OF 72 RETROSPOT CAKE CASES
98         21977   PACK OF 60 PINK PAISLEY CAKE CASES
177       85099B              JUMBO BAG RED RETROSPOT
221        22197                 SMALL POPCORN HOLDER
649        22616           PACK OF 12 LONDON TISSUES 
3259       84077    WORLD WAR 2 GLIDERS ASSTD DESIGNS
183218     23084                   RABBIT NIGHT LIGHT
189299     84879                              damaged
220843    85123A                                    ?
223184     22197             

In [None]:
from mlxtend.frequent_patterns import apriori

# Assuming `basket` is your one-hot encoded basket data for Apriori

# Apply the Apriori algorithm to find frequent itemsets
frequent_itemsets = apriori(basket, min_support=0.05, use_colnames=True)

# Sort the itemsets by support (frequency) and get the top 10
top_10_itemsets = frequent_itemsets.sort_values(by='support', ascending=False).head(10)

# Print the top 10 frequent itemsets
print("Top 10 frequent itemsets:")
print(top_10_itemsets)


  and should_run_async(code)


Top 10 frequent itemsets:
     support  itemsets
19  0.106711  (85123A)
9   0.091929   (22423)
18  0.086319  (85099B)
16  0.074450   (47566)
17  0.074180   (84879)
0   0.069540   (20725)
12  0.061826   (22720)
20  0.059290    (POST)
13  0.058265   (23203)
1   0.056754   (20727)


# **Filter and Analyze Rules**

In [None]:
# Filter rules based on confidence > 0.6 and lift > 1.2
filtered_rules = rules[(rules['confidence'] > 0.6) & (rules['lift'] > 1.2)]

# Display the first few filtered rules
filtered_rules.head()

  and should_run_async(code)


Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric


# **Save the Filtered Rules**

In [None]:
# Save the filtered rules to a CSV file
filtered_rules.to_csv('/content/filtered_association_rules.csv', index=False)


  and should_run_async(code)
