In [1]:
#Import necessary libraries:

import numpy as np
import pandas as pd
import mlxtend

In [2]:
from mlxtend.frequent_patterns import association_rules, apriori

In [3]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
# Read the data:
market=pd.read_excel("OnlineClothing.xlsx",sheet_name="Data")

In [5]:
market

Unnamed: 0,OrderID,CustomerID,PurchaseDate (yy-mm-dd),ProductID,Product,Quantity,UnitPrice
0,1001,101,2023-06-27,P01,Summer Cap,2,100
1,1001,101,2023-06-27,P02,Sunglasses,1,50
2,1002,102,2023-07-28,P07,Kurta,1,150
3,1003,103,2023-07-29,P03,Half Sleeve T-shirt,1,200
4,1003,103,2023-07-29,P04,Capri,2,350
5,1004,104,2023-08-31,P05,Saree,1,400
6,1004,104,2023-08-31,P06,Earrings,1,30
7,1005,105,2023-09-01,P03,Half Sleeve T-shirt,1,200
8,1005,105,2023-09-01,P04,Capri,2,350
9,1006,105,2023-09-02,P07,Kurta,2,150


In [6]:
#checking for missing values:

market.isnull().sum()

 OrderID                    0
 CustomerID                 0
 PurchaseDate (yy-mm-dd)    0
 ProductID                  0
Product                     0
 Quantity                   0
 UnitPrice                  0
dtype: int64

So, there is no missing value the the dataset.

In [7]:
#checking for duplicate rows:
market[market.duplicated()]

Unnamed: 0,OrderID,CustomerID,PurchaseDate (yy-mm-dd),ProductID,Product,Quantity,UnitPrice


So, there is no duplicate entry.

In [8]:
market.columns

Index([' OrderID ', ' CustomerID ', ' PurchaseDate (yy-mm-dd)', ' ProductID ',
       'Product', ' Quantity ', ' UnitPrice '],
      dtype='object')

In [9]:
# transforming the data for applying ariori alogorithm:

transaction_data = market[[" OrderID ","Product"]].copy()

In [12]:
# One-hot encoding the transaction data
encoded_transaction_data = transaction_data.groupby([' OrderID ', 'Product']).size().unstack(fill_value=0)
encoded_transaction_data = encoded_transaction_data.applymap(lambda x: 1 if x > 0 else 0)


In [13]:
encoded_transaction_data

Product,Capri,Earrings,Half Sleeve T-shirt,Jeans,Kurta,Lahenga,Long Sleeve T-shirt,Saree,Sherwani,Summer Cap,Sunglasses,Sweatshirt,Thermocoat,Winter Cap
OrderID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1
1001,0,0,0,0,0,0,0,0,0,1,1,0,0,0
1002,0,0,0,0,1,0,0,0,0,0,0,0,0,0
1003,1,0,1,0,0,0,0,0,0,0,0,0,0,0
1004,0,1,0,0,0,0,0,1,0,0,0,0,0,0
1005,1,0,1,0,0,0,0,0,0,0,0,0,0,0
1006,0,0,0,0,1,0,0,0,0,0,0,0,0,0
1007,0,1,0,0,0,0,0,1,0,0,0,0,0,0
1008,0,0,0,0,0,1,0,0,0,0,0,0,0,0
1009,0,0,0,0,0,0,0,0,1,0,0,0,0,0
1010,0,0,0,0,0,1,0,0,0,0,0,0,0,0


In [14]:
#checking frequency of all products:
support = 0.01
frequent_items = apriori(encoded_transaction_data,min_support=support, use_colnames=True)
frequent_items.sort_values('support')

Unnamed: 0,support,itemsets
9,0.043478,(Summer Cap)
12,0.043478,(Thermocoat)
17,0.043478,"(Sunglasses, Summer Cap)"
13,0.086957,(Winter Cap)
11,0.086957,(Sweatshirt)
18,0.086957,"(Sunglasses, Winter Cap)"
4,0.086957,(Kurta)
6,0.130435,(Long Sleeve T-shirt)
7,0.130435,(Saree)
8,0.130435,(Sherwani)


In [20]:
# Applying the Apriori algorithm
frequent_itemsets = apriori(encoded_transaction_data, min_support=0.1, use_colnames=True)
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)
rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']]

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(Capri),(Half Sleeve T-shirt),0.130435,1.0,7.666667
1,(Half Sleeve T-shirt),(Capri),0.130435,1.0,7.666667
2,(Earrings),(Saree),0.130435,1.0,7.666667
3,(Saree),(Earrings),0.130435,1.0,7.666667
4,(Long Sleeve T-shirt),(Jeans),0.130435,1.0,7.666667
5,(Jeans),(Long Sleeve T-shirt),0.130435,1.0,7.666667
