In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# we will ignore pandas warning 
import warnings
warnings.filterwarnings('ignore')

## **Book Dataset**

In [None]:
df = {'T100': ['I1', 'I2', 'I5'],
      'T200': ['I2', 'I4'],
      'T300': ['I2', 'I3'],
      'T400': ['I1', 'I2', 'I4'],
      'T500': ['I1', 'I3'],
      'T600': ['I2', 'I3'],
      'T700': ['I1', 'I3'],
      'T800': ['I1', 'I2', 'I3', 'I5'],
      'T900': ['I1', 'I2', 'I3']}

In [None]:
itemList = []

In [None]:
for i in df:
  itemList.append(df[i])

In [None]:
itemList

[['I1', 'I2', 'I5'],
 ['I2', 'I4'],
 ['I2', 'I3'],
 ['I1', 'I2', 'I4'],
 ['I1', 'I3'],
 ['I2', 'I3'],
 ['I1', 'I3'],
 ['I1', 'I2', 'I3', 'I5'],
 ['I1', 'I2', 'I3']]

In [None]:
min_support = 2

## **Function to calculate first Frequent Item Set**

In [None]:
def firstFrequentItemSet(itemList, min_support):
  first_frequent_dict = {}
  before_first_frequent_list = []
  first_frequent_list = []
  
  for items in itemList:
    for item in items:
      if item not in first_frequent_dict:
        first_frequent_dict[item] = 1
      else:
        first_frequent_dict[item] += 1

  for item in first_frequent_dict:
    before_first_frequent_list.append(([item], first_frequent_dict[item]))
    if first_frequent_dict[item] >= min_support:
      first_frequent_list.append(([item], first_frequent_dict[item]))

  return first_frequent_list, before_first_frequent_list

In [None]:
firstFrequentList, beforeFirstFrequentList = firstFrequentItemSet(itemList, min_support)

In [None]:
beforeFirstFrequentList

[(['I1'], 6), (['I2'], 7), (['I5'], 2), (['I4'], 2), (['I3'], 6)]

In [None]:
firstFrequentList

[(['I1'], 6), (['I2'], 7), (['I5'], 2), (['I4'], 2), (['I3'], 6)]

**All Frequent Item Sets**

In [None]:
allFrequentItems = []
allFrequentItemsWithSupport = []

In [None]:
for item in firstFrequentList:
  allFrequentItems.append(item[0])
  allFrequentItemsWithSupport.append(item)

In [None]:
allFrequentItems

[['I1'], ['I2'], ['I5'], ['I4'], ['I3']]

In [None]:
allFrequentItemsWithSupport

[(['I1'], 6), (['I2'], 7), (['I5'], 2), (['I4'], 2), (['I3'], 6)]

## **Function to calculate Second Frequent Item SEt**

In [None]:
def secondFrequentItemSet(firstFrequentList, itemList, min_support):
  before_second_frequent_list = []
  second_frequent_list = []

  firstFreq = []

  for item in firstFrequentList:
    firstFreq.append(item[0][0])

  # print(firstFreq)

  for i in range(len(firstFreq)):
    for j in range(i+1, len(firstFreq)):
      item_1 = firstFreq[i]
      item_2 = firstFreq[j]
      pair = [item_1, item_2]
      # print(pair)

      pair_count = 0

      for items in itemList:
        if item_1 in items and item_2 in items:
          pair_count += 1



      if(pair_count >= min_support):
        listi = [item_1, item_2]
        listi.sort()
        before_second_frequent_list.append((listi, pair_count))
        second_frequent_list.append((listi, pair_count))
        # print(second_frequent_list)
      else:
        listi = [item_1, item_2]
        before_second_frequent_list.append((listi, pair_count))

  return second_frequent_list, before_second_frequent_list

In [None]:
secondFrequentList, beforeSecondFrequentList = secondFrequentItemSet(firstFrequentList, itemList, min_support)

In [None]:
beforeSecondFrequentList

[(['I1', 'I2'], 4),
 (['I1', 'I5'], 2),
 (['I1', 'I4'], 1),
 (['I1', 'I3'], 4),
 (['I2', 'I5'], 2),
 (['I2', 'I4'], 2),
 (['I2', 'I3'], 4),
 (['I5', 'I4'], 0),
 (['I5', 'I3'], 1),
 (['I4', 'I3'], 0)]

In [None]:
secondFrequentList

[(['I1', 'I2'], 4),
 (['I1', 'I5'], 2),
 (['I1', 'I3'], 4),
 (['I2', 'I5'], 2),
 (['I2', 'I4'], 2),
 (['I2', 'I3'], 4)]

**Appending in All frequent Item SEts**

In [None]:
for item in secondFrequentList:
  allFrequentItems.append(item[0])
  allFrequentItemsWithSupport.append(item)

In [None]:
allFrequentItems

[['I1'],
 ['I2'],
 ['I5'],
 ['I4'],
 ['I3'],
 ['I1', 'I2'],
 ['I1', 'I5'],
 ['I1', 'I3'],
 ['I2', 'I5'],
 ['I2', 'I4'],
 ['I2', 'I3']]

In [None]:
allFrequentItemsWithSupport

[(['I1'], 6),
 (['I2'], 7),
 (['I5'], 2),
 (['I4'], 2),
 (['I3'], 6),
 (['I1', 'I2'], 4),
 (['I1', 'I5'], 2),
 (['I1', 'I3'], 4),
 (['I2', 'I5'], 2),
 (['I2', 'I4'], 2),
 (['I2', 'I3'], 4)]

**Unique Items**

In [None]:
unique = []

In [None]:
for item in firstFrequentList:
  unique.append(item[0][0])

In [None]:
unique

['I1', 'I2', 'I5', 'I4', 'I3']

## **Pruning**  
**Checking that the item present in the frequentItemset or not**

In [None]:
def is_apriori(item_sets, union):
    for item in union:
        subset = union.copy()
        subset.remove(item)
        # print(subset)
        if subset not in item_sets:
          return False
    return True

## **Frequent Item Set for 3 or greater**

In [None]:
from itertools import combinations

In [None]:
def threeOrGreater(unique, itemList, allFrequentItems, min_support):
  for size in range(3, len(unique)+1):

    beforeDontKnowList = []
    dontKnowList = []

    for item_set in combinations(unique, size):
      # print(item_set)

      pair_count = 0

      for items in itemList:
        if set(item_set).issubset(items):
          pair_count += 1

      if(pair_count >= min_support):
        if is_apriori(allFrequentItems, list(item_set)):
          beforeDontKnowList.append((list(item_set), pair_count))
          dontKnowList.append((list(item_set), pair_count))
        else:
          beforeDontKnowList.append((list(item_set), pair_count))

    print(f"{size} Before Frequent Item Set")
    print("---------------------")
    print(beforeDontKnowList)
    print("---------------------")
    print("---------------------")
    print(f"{size} Frequent Item Set")
    print("---------------------")
    print(dontKnowList)
    print("---------------------")
    print("---------------------")

    for item in dontKnowList:
      allFrequentItems.append(item[0])
      allFrequentItemsWithSupport.append(item)
    
    if(len(dontKnowList) == 0):
      break

In [None]:
threeOrGreater(unique, itemList, allFrequentItems, min_support)

3 Before Frequent Item Set
---------------------
[(['I1', 'I2', 'I5'], 2), (['I1', 'I2', 'I3'], 2)]
---------------------
---------------------
3 Frequent Item Set
---------------------
[(['I1', 'I2', 'I5'], 2), (['I1', 'I2', 'I3'], 2)]
---------------------
---------------------
4 Before Frequent Item Set
---------------------
[]
---------------------
---------------------
4 Frequent Item Set
---------------------
[]
---------------------
---------------------


In [None]:
allFrequentItems

[['I1'],
 ['I2'],
 ['I5'],
 ['I4'],
 ['I3'],
 ['I1', 'I2'],
 ['I1', 'I5'],
 ['I1', 'I3'],
 ['I2', 'I5'],
 ['I2', 'I4'],
 ['I2', 'I3'],
 ['I1', 'I2', 'I5'],
 ['I1', 'I2', 'I3']]

In [None]:
allFrequentItemsWithSupport

[(['I1'], 6),
 (['I2'], 7),
 (['I5'], 2),
 (['I4'], 2),
 (['I3'], 6),
 (['I1', 'I2'], 4),
 (['I1', 'I5'], 2),
 (['I1', 'I3'], 4),
 (['I2', 'I5'], 2),
 (['I2', 'I4'], 2),
 (['I2', 'I3'], 4),
 (['I1', 'I2', 'I5'], 2),
 (['I1', 'I2', 'I3'], 2)]

## **Antecedants and Consequents**

In [None]:
min_confidence = 0.5

In [None]:
def antCons(allFrequentItems, allFrequentItemsWithSupport, min_confidence):
  aprioriList = []

  for i in range(len(allFrequentItems)):
    for j in range(i+1, len(allFrequentItems)):

      combinePair = []

      for p in allFrequentItemsWithSupport[i][0]:
        combinePair.append(p)

      for k in allFrequentItemsWithSupport[j][0]:
        if k not in combinePair:
          combinePair.append(k)

      combinePair.sort()
      # print(combinePair)

      if combinePair in allFrequentItems:
        idx = allFrequentItems.index(combinePair)
        confidence = allFrequentItemsWithSupport[idx][1] / allFrequentItemsWithSupport[i][1]
        # print(confidence)

        if confidence >= min_confidence:
          aprioriList.append((allFrequentItemsWithSupport[i][0], allFrequentItemsWithSupport[j][0], confidence))
  return aprioriList

In [None]:
aprioriList = antCons(allFrequentItems, allFrequentItemsWithSupport, min_confidence)

In [None]:
aprioriList

[(['I1'], ['I2'], 0.6666666666666666),
 (['I1'], ['I3'], 0.6666666666666666),
 (['I1'], ['I1', 'I2'], 0.6666666666666666),
 (['I1'], ['I1', 'I3'], 0.6666666666666666),
 (['I2'], ['I3'], 0.5714285714285714),
 (['I2'], ['I1', 'I2'], 0.5714285714285714),
 (['I2'], ['I2', 'I3'], 0.5714285714285714),
 (['I5'], ['I1', 'I2'], 1.0),
 (['I5'], ['I1', 'I5'], 1.0),
 (['I5'], ['I2', 'I5'], 1.0),
 (['I5'], ['I1', 'I2', 'I5'], 1.0),
 (['I4'], ['I2', 'I4'], 1.0),
 (['I3'], ['I1', 'I3'], 0.6666666666666666),
 (['I3'], ['I2', 'I3'], 0.6666666666666666),
 (['I1', 'I2'], ['I1', 'I5'], 0.5),
 (['I1', 'I2'], ['I1', 'I3'], 0.5),
 (['I1', 'I2'], ['I2', 'I5'], 0.5),
 (['I1', 'I2'], ['I2', 'I3'], 0.5),
 (['I1', 'I2'], ['I1', 'I2', 'I5'], 0.5),
 (['I1', 'I2'], ['I1', 'I2', 'I3'], 0.5),
 (['I1', 'I5'], ['I2', 'I5'], 1.0),
 (['I1', 'I5'], ['I1', 'I2', 'I5'], 1.0),
 (['I1', 'I3'], ['I2', 'I3'], 0.5),
 (['I1', 'I3'], ['I1', 'I2', 'I3'], 0.5),
 (['I2', 'I5'], ['I1', 'I2', 'I5'], 1.0),
 (['I2', 'I3'], ['I1', 'I2', 'I

## **Using Custom Dataset**

In [None]:
data = pd.read_csv('/content/orders_export.csv')

In [None]:
print(data.shape)
data.head()

(118, 74)


Unnamed: 0,Name,Email,Financial Status,Paid at,Fulfillment Status,Fulfilled at,Accepts Marketing,Currency,Subtotal,Shipping,...,Tax 5 Name,Tax 5 Value,Phone,Receipt Number,Duties,Billing Province Name,Shipping Province Name,Payment ID,Payment Terms Name,Next Payment Due At
0,#2891,raslimaster@yahoo.com,paid,2022-05-07 21:27:24 +0530,fulfilled,2022-05-09 17:35:44 +0530,yes,INR,2830.0,0.0,...,,,,,,Tamil Nadu,Tamil Nadu,,,
1,#2891,raslimaster@yahoo.com,,,,,,,,,...,,,,,,,,,,
2,#2891,raslimaster@yahoo.com,,,,,,,,,...,,,,,,,,,,
3,#2892,ancyreji73@gmail.com,pending,,fulfilled,2022-05-09 17:35:44 +0530,yes,INR,1145.0,100.0,...,,,,,,Madhya Pradesh,Madhya Pradesh,,,
4,#2893,letusmakethisplaceheaven@gmail.com,paid,2022-05-09 14:00:01 +0530,fulfilled,2022-05-16 08:55:02 +0530,no,INR,7795.0,0.0,...,,,,,,Delhi,Delhi,,,


## **Only Important columns has been taken from the Data that contribute in Apriori Algorithm**  
**User, Item, Quantity**

In [None]:
df = data[['Name', 'Lineitem quantity', 'Lineitem sku']]

In [None]:
print(df.shape)
df.head()

(118, 3)


Unnamed: 0,Name,Lineitem quantity,Lineitem sku
0,#2891,1,HOD0011
1,#2891,1,HOD0444
2,#2891,1,HOD0005
3,#2892,1,HOD0126
4,#2893,1,HOD0008


In [None]:
df.nunique()

Name                 50
Lineitem quantity     2
Lineitem sku         94
dtype: int64

In [None]:
df['Name'] = df['Name'].str.replace('#', '')

In [None]:
df.head()

Unnamed: 0,Name,Lineitem quantity,Lineitem sku
0,2891,1,HOD0011
1,2891,1,HOD0444
2,2891,1,HOD0005
3,2892,1,HOD0126
4,2893,1,HOD0008


In [None]:
group = df.groupby(['Name', 'Lineitem sku']).sum().reset_index()

In [None]:
print(group.shape)
group.head()

(118, 3)


Unnamed: 0,Name,Lineitem sku,Lineitem quantity
0,2891,HOD0005,1
1,2891,HOD0011,1
2,2891,HOD0444,1
3,2892,HOD0126,1
4,2893,HOD0008,1


## **The Pivot Table to find, which item the user have purchased**

In [None]:
rating = group.pivot(index = 'Name', columns = 'Lineitem sku', values='Lineitem quantity').fillna(0).astype('int')

In [None]:
rating

Lineitem sku,HOD0003,HOD0005,HOD0007,HOD0008,HOD0011,HOD0021,HOD0022,HOD0031,HOD0034,HOD0037,...,HOD0754,HOD0757,HOD0762,HOD0766,HOD0767,HOD0768,HOD0769,HOD0772,HOD0774,HOD0775
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2891,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2892,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2893,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2894,0,0,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2895,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2896,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2897,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2898,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2899,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2900,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,1


## **Binarize the rating matrix, because we are only interested either the user have make purchased or not**

In [None]:
rating_binary = rating.copy()
rating_binary[rating_binary !=0] = 1

In [None]:
rating_binary

Lineitem sku,HOD0003,HOD0005,HOD0007,HOD0008,HOD0011,HOD0021,HOD0022,HOD0031,HOD0034,HOD0037,...,HOD0754,HOD0757,HOD0762,HOD0766,HOD0767,HOD0768,HOD0769,HOD0772,HOD0774,HOD0775
Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2891,0,1,0,0,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2892,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2893,0,0,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2894,0,0,0,0,0,0,1,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2895,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2896,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2897,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2898,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2899,1,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2900,0,0,0,0,0,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,1


**Converting them into a proper form**  
**In which the apriori take a input**

In [None]:
items = rating.columns

In [None]:
items

Index(['HOD0003', 'HOD0005', 'HOD0007', 'HOD0008', 'HOD0011', 'HOD0021',
       'HOD0022', 'HOD0031', 'HOD0034', 'HOD0037', 'HOD0045', 'HOD0052',
       'HOD0079', 'HOD0099', 'HOD0100', 'HOD0103', 'HOD0105', 'HOD0111',
       'HOD0118', 'HOD0123', 'HOD0126', 'HOD0147', 'HOD0153', 'HOD0157',
       'HOD0184', 'HOD0196', 'HOD0208', 'HOD0213', 'HOD0217', 'HOD0225',
       'HOD0229', 'HOD0242', 'HOD0244', 'HOD0262', 'HOD0271', 'HOD0274',
       'HOD0305', 'HOD0326', 'HOD0386', 'HOD0387', 'HOD0397', 'HOD0421',
       'HOD0429', 'HOD0433', 'HOD0444', 'HOD0466', 'HOD0489', 'HOD0491',
       'HOD0496', 'HOD0519', 'HOD0525', 'HOD0538', 'HOD0542', 'HOD0544',
       'HOD0548', 'HOD0573', 'HOD0575', 'HOD0579', 'HOD0586', 'HOD0588',
       'HOD0592', 'HOD0607', 'HOD0608', 'HOD0613', 'HOD0615', 'HOD0618',
       'HOD0628', 'HOD0631', 'HOD0632', 'HOD0635', 'HOD0640', 'HOD0644',
       'HOD0647', 'HOD0658', 'HOD0742', 'HOD0743', 'HOD0744', 'HOD0745',
       'HOD0747', 'HOD0748', 'HOD0749', 'HOD0750', 

In [None]:
user = rating.index

In [None]:
user

Index(['2891', '2892', '2893', '2894', '2895', '2896', '2897', '2898', '2899',
       '2900', '2901', '2902', '2903', '2904', '2905', '2906', '2907', '2908',
       '2909', '2910', '2911', '2912', '2913', '2914', '2915', '2916', '2917',
       '2918', '2919', '2920', '2921', '2922', '2923', '2924', '2925', '2926',
       '2927', '2928', '2929', '2930', '2931', '2932', '2933', '2934', '2935',
       '2936', '2937', '2938', '2939', '2940'],
      dtype='object', name='Name')

In [None]:
purchaseList = []

In [None]:
for i in range(len(user)):
  purchase = []
  for j in range(len(items)):
    if not pd.isnull(rating.iloc[i,j]) and rating.iloc[i,j]!=0:
      purchase.append(items[j])
  purchaseList.append(purchase)

In [None]:
purchaseList

[['HOD0005', 'HOD0011', 'HOD0444'],
 ['HOD0126'],
 ['HOD0008', 'HOD0242', 'HOD0326', 'HOD0429', 'HOD0749'],
 ['HOD0022',
  'HOD0034',
  'HOD0045',
  'HOD0052',
  'HOD0111',
  'HOD0118',
  'HOD0123',
  'HOD0153',
  'HOD0208',
  'HOD0262'],
 ['HOD0099', 'HOD0305', 'HOD0525'],
 ['HOD0573', 'HOD0613'],
 ['HOD0573', 'HOD0613'],
 ['HOD0575', 'HOD0579', 'HOD0607', 'HOD0615', 'HOD0635'],
 ['HOD0003', 'HOD0213', 'HOD0466'],
 ['HOD0745', 'HOD0751', 'HOD0757', 'HOD0775'],
 ['HOD0745', 'HOD0751', 'HOD0775'],
 ['HOD0747'],
 ['HOD0491'],
 ['HOD0491'],
 ['HOD0608', 'HOD0628', 'HOD0640', 'HOD0644'],
 ['HOD0244', 'HOD0544', 'HOD0745'],
 ['HOD0103', 'HOD0386', 'HOD0542', 'HOD0640'],
 ['HOD0586', 'HOD0618'],
 ['HOD0631', 'HOD0647'],
 ['HOD0433', 'HOD0742', 'HOD0748', 'HOD0754'],
 ['HOD0744'],
 ['HOD0749'],
 ['HOD0750', 'HOD0774'],
 ['HOD0034'],
 ['HOD0744', 'HOD0750'],
 ['HOD0632', 'HOD0640'],
 ['HOD0767'],
 ['HOD0034', 'HOD0100', 'HOD0184', 'HOD0196', 'HOD0274'],
 ['HOD0592'],
 ['HOD0592'],
 ['HOD0748']

In [None]:
min_support = 2
min_confidence = 0.6

## **First Frequent Item Set**

In [None]:
firstFrequentList = firstFrequentItemSet(purchaseList, min_support)

In [None]:
firstFrequentList

[(['HOD0749'], 2),
 (['HOD0034'], 3),
 (['HOD0111'], 2),
 (['HOD0573'], 2),
 (['HOD0613'], 2),
 (['HOD0575'], 2),
 (['HOD0745'], 3),
 (['HOD0751'], 2),
 (['HOD0757'], 2),
 (['HOD0775'], 2),
 (['HOD0491'], 2),
 (['HOD0640'], 3),
 (['HOD0542'], 2),
 (['HOD0748'], 2),
 (['HOD0744'], 3),
 (['HOD0750'], 2),
 (['HOD0592'], 3),
 (['HOD0768'], 2),
 (['HOD0079'], 2)]

**All Frequent Item Sets**

In [None]:
allFrequentItems = []
allFrequentItemsWithSupport = []

In [None]:
for item in firstFrequentList:
  allFrequentItems.append(item[0])
  allFrequentItemsWithSupport.append(item)

In [None]:
allFrequentItems

[['HOD0749'],
 ['HOD0034'],
 ['HOD0111'],
 ['HOD0573'],
 ['HOD0613'],
 ['HOD0575'],
 ['HOD0745'],
 ['HOD0751'],
 ['HOD0757'],
 ['HOD0775'],
 ['HOD0491'],
 ['HOD0640'],
 ['HOD0542'],
 ['HOD0748'],
 ['HOD0744'],
 ['HOD0750'],
 ['HOD0592'],
 ['HOD0768'],
 ['HOD0079']]

In [None]:
allFrequentItemsWithSupport

[(['HOD0749'], 2),
 (['HOD0034'], 3),
 (['HOD0111'], 2),
 (['HOD0573'], 2),
 (['HOD0613'], 2),
 (['HOD0575'], 2),
 (['HOD0745'], 3),
 (['HOD0751'], 2),
 (['HOD0757'], 2),
 (['HOD0775'], 2),
 (['HOD0491'], 2),
 (['HOD0640'], 3),
 (['HOD0542'], 2),
 (['HOD0748'], 2),
 (['HOD0744'], 3),
 (['HOD0750'], 2),
 (['HOD0592'], 3),
 (['HOD0768'], 2),
 (['HOD0079'], 2)]

## **Second Frequent Item Set**

In [None]:
secondFrequentList = secondFrequentItemSet(firstFrequentList, purchaseList, min_support)

In [None]:
secondFrequentList

[(['HOD0573', 'HOD0613'], 2),
 (['HOD0745', 'HOD0751'], 2),
 (['HOD0745', 'HOD0775'], 2),
 (['HOD0751', 'HOD0775'], 2)]

**Appending in All frequent Item SEts**

In [None]:
for item in secondFrequentList:
  allFrequentItems.append(item[0])
  allFrequentItemsWithSupport.append(item)

In [None]:
allFrequentItems

[['HOD0749'],
 ['HOD0034'],
 ['HOD0111'],
 ['HOD0573'],
 ['HOD0613'],
 ['HOD0575'],
 ['HOD0745'],
 ['HOD0751'],
 ['HOD0757'],
 ['HOD0775'],
 ['HOD0491'],
 ['HOD0640'],
 ['HOD0542'],
 ['HOD0748'],
 ['HOD0744'],
 ['HOD0750'],
 ['HOD0592'],
 ['HOD0768'],
 ['HOD0079'],
 ['HOD0573', 'HOD0613'],
 ['HOD0745', 'HOD0751'],
 ['HOD0745', 'HOD0775'],
 ['HOD0751', 'HOD0775']]

In [None]:
allFrequentItemsWithSupport

[(['HOD0749'], 2),
 (['HOD0034'], 3),
 (['HOD0111'], 2),
 (['HOD0573'], 2),
 (['HOD0613'], 2),
 (['HOD0575'], 2),
 (['HOD0745'], 3),
 (['HOD0751'], 2),
 (['HOD0757'], 2),
 (['HOD0775'], 2),
 (['HOD0491'], 2),
 (['HOD0640'], 3),
 (['HOD0542'], 2),
 (['HOD0748'], 2),
 (['HOD0744'], 3),
 (['HOD0750'], 2),
 (['HOD0592'], 3),
 (['HOD0768'], 2),
 (['HOD0079'], 2),
 (['HOD0573', 'HOD0613'], 2),
 (['HOD0745', 'HOD0751'], 2),
 (['HOD0745', 'HOD0775'], 2),
 (['HOD0751', 'HOD0775'], 2)]

**Unique Items**

In [None]:
unique = []

In [None]:
for item in firstFrequentList:
  unique.append(item[0][0])

In [None]:
unique

['HOD0749',
 'HOD0034',
 'HOD0111',
 'HOD0573',
 'HOD0613',
 'HOD0575',
 'HOD0745',
 'HOD0751',
 'HOD0757',
 'HOD0775',
 'HOD0491',
 'HOD0640',
 'HOD0542',
 'HOD0748',
 'HOD0744',
 'HOD0750',
 'HOD0592',
 'HOD0768',
 'HOD0079']

## **Frequent Item Set for 3 or greater Item**

In [None]:
threeOrGreater(unique, purchaseList, allFrequentItems, min_support)

3 Frequent Item Set
---------------------
[(['HOD0745', 'HOD0751', 'HOD0775'], 2)]
---------------------
---------------------
4 Frequent Item Set
---------------------
[]
---------------------
---------------------


## **Antecedants and Consequents with Confidence**

In [None]:
aprioriList = antCons(allFrequentItems, allFrequentItemsWithSupport, min_confidence)

In [None]:
aprioriList

[(['HOD0573'], ['HOD0613'], 1.0),
 (['HOD0573'], ['HOD0573', 'HOD0613'], 1.0),
 (['HOD0613'], ['HOD0573', 'HOD0613'], 1.0),
 (['HOD0745'], ['HOD0751'], 0.6666666666666666),
 (['HOD0745'], ['HOD0775'], 0.6666666666666666),
 (['HOD0745'], ['HOD0745', 'HOD0751'], 0.6666666666666666),
 (['HOD0745'], ['HOD0745', 'HOD0775'], 0.6666666666666666),
 (['HOD0745'], ['HOD0751', 'HOD0775'], 0.6666666666666666),
 (['HOD0745'], ['HOD0745', 'HOD0751', 'HOD0775'], 0.6666666666666666),
 (['HOD0751'], ['HOD0775'], 1.0),
 (['HOD0751'], ['HOD0745', 'HOD0751'], 1.0),
 (['HOD0751'], ['HOD0745', 'HOD0775'], 1.0),
 (['HOD0751'], ['HOD0751', 'HOD0775'], 1.0),
 (['HOD0751'], ['HOD0745', 'HOD0751', 'HOD0775'], 1.0),
 (['HOD0775'], ['HOD0745', 'HOD0751'], 1.0),
 (['HOD0775'], ['HOD0745', 'HOD0775'], 1.0),
 (['HOD0775'], ['HOD0751', 'HOD0775'], 1.0),
 (['HOD0775'], ['HOD0745', 'HOD0751', 'HOD0775'], 1.0),
 (['HOD0745', 'HOD0751'], ['HOD0745', 'HOD0775'], 1.0),
 (['HOD0745', 'HOD0751'], ['HOD0751', 'HOD0775'], 1.0),

# **By using Library**

In [None]:
#From Libraries

In [None]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules

In [None]:
frequent_itemset = apriori(rating_binary, min_support=0.04, use_colnames=True)

In [None]:
frequent_itemset

Unnamed: 0,support,itemsets
0,0.06,(HOD0034)
1,0.04,(HOD0079)
2,0.04,(HOD0111)
3,0.04,(HOD0491)
4,0.04,(HOD0542)
5,0.04,(HOD0573)
6,0.04,(HOD0575)
7,0.06,(HOD0592)
8,0.04,(HOD0613)
9,0.06,(HOD0640)


In [None]:
rules = association_rules(frequent_itemset, metric="lift", min_threshold=1)

In [None]:
rules

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(HOD0573),(HOD0613),0.04,0.04,0.04,1.0,25.0,0.0384,inf
1,(HOD0613),(HOD0573),0.04,0.04,0.04,1.0,25.0,0.0384,inf
2,(HOD0745),(HOD0751),0.06,0.04,0.04,0.666667,16.666667,0.0376,2.88
3,(HOD0751),(HOD0745),0.04,0.06,0.04,1.0,16.666667,0.0376,inf
4,(HOD0775),(HOD0745),0.04,0.06,0.04,1.0,16.666667,0.0376,inf
5,(HOD0745),(HOD0775),0.06,0.04,0.04,0.666667,16.666667,0.0376,2.88
6,(HOD0775),(HOD0751),0.04,0.04,0.04,1.0,25.0,0.0384,inf
7,(HOD0751),(HOD0775),0.04,0.04,0.04,1.0,25.0,0.0384,inf
8,"(HOD0775, HOD0745)",(HOD0751),0.04,0.04,0.04,1.0,25.0,0.0384,inf
9,"(HOD0775, HOD0751)",(HOD0745),0.04,0.06,0.04,1.0,16.666667,0.0376,inf


In [None]:
df_res = rules.sort_values(by=['lift'], ascending=False)
df_res.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
0,(HOD0573),(HOD0613),0.04,0.04,0.04,1.0,25.0,0.0384,inf
1,(HOD0613),(HOD0573),0.04,0.04,0.04,1.0,25.0,0.0384,inf
6,(HOD0775),(HOD0751),0.04,0.04,0.04,1.0,25.0,0.0384,inf
7,(HOD0751),(HOD0775),0.04,0.04,0.04,1.0,25.0,0.0384,inf
8,"(HOD0775, HOD0745)",(HOD0751),0.04,0.04,0.04,1.0,25.0,0.0384,inf


In [None]:
df_rec = df_res[df_res['antecedents'].apply(lambda x: len(x) ==1 and next(iter(x)) == 'HOD0745')]

In [None]:
df_rec.head()

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction
2,(HOD0745),(HOD0751),0.06,0.04,0.04,0.666667,16.666667,0.0376,2.88
5,(HOD0745),(HOD0775),0.06,0.04,0.04,0.666667,16.666667,0.0376,2.88
12,(HOD0745),"(HOD0775, HOD0751)",0.06,0.04,0.04,0.666667,16.666667,0.0376,2.88
