## <span style="color : green"> Association Rule Mining </span>

# <center> Table of Contents </center>

1. Encode the data into Transcript
1. Train the apriori algorithm and compute t


### **Apriori Algorithm**

###Description
The Apriori algorithm uses frequent itemsets to generate association rules, and it is designed to work on the databases that contain transactions. With the help of these association rule, it determines how strongly or how weakly two objects are connected. This algorithm uses a breadth-first search and Hash Tree to calculate the itemset associations efficiently. It is the iterative process for finding the frequent itemsets from the large dataset.

In [None]:
! pip install apyori

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting apyori
  Downloading apyori-1.1.2.tar.gz (8.6 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: apyori
  Building wheel for apyori (setup.py) ... [?25l[?25hdone
  Created wheel for apyori: filename=apyori-1.1.2-py3-none-any.whl size=5976 sha256=04468732c5dec7adaae5934ab9f10b06387f607f281532277fac82cf65635c65
  Stored in directory: /root/.cache/pip/wheels/32/2a/54/10c595515f385f3726642b10c60bf788029e8f3a1323e3913a
Successfully built apyori
Installing collected packages: apyori
Successfully installed apyori-1.1.2


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from apyori import apriori


## 1) Read the data

In [None]:
df=pd.read_csv("Shop1.csv")
df.head()

Unnamed: 0,PID,Item
0,1,"burgers,meatballs,eggs"
1,2,"mineral water,milk,energy bar,whole wheat rice..."
2,3,low fat yogurt
3,4,"whole wheat pasta,french fries"
4,5,"soup,light cream,shallot"


## 2) Display the candidate set and frequency set for every iteration

In [None]:
from apyori import apriori
import pandas as pd

items = df['Item'].values
data_list = items.tolist()
# data_list = [x.split(',') for x in items.tolist() ]
# data_list = reduce(lambda acc, sublist: acc + sublist, data_list)

rules = apriori(data_list, min_support=0.8, \
                min_confidence=0.8, \
                min_lift=1.1, \
                min_length=2)

for rule in rules:
    print("Candidate Set: ", list(rule.items))
    print("Support: ", rule.support)



Candidate Set:  [' ', 'i', 'r']
Support:  0.8
Candidate Set:  ['n', ' ', 'r']
Support:  0.8
Candidate Set:  [' ', 'r', 'o']
Support:  0.84
Candidate Set:  ['e', ' ', 'r', ',']
Support:  0.84
Candidate Set:  ['r', ' ', 'i', ',']
Support:  0.8
Candidate Set:  ['n', ' ', 'r', ',']
Support:  0.8
Candidate Set:  [' ', 'r', ',', 'o']
Support:  0.8
Candidate Set:  [' ', 'i', 'r', 'a']
Support:  0.8
Candidate Set:  ['n', ' ', 'r', 'a']
Support:  0.8
Candidate Set:  [' ', 'r', 'o', 'a']
Support:  0.84
Candidate Set:  ['e', ' ', 'i', 'r']
Support:  0.8
Candidate Set:  ['e', 'n', ' ', 'r']
Support:  0.8
Candidate Set:  ['e', ' ', 'r', 'o']
Support:  0.8
Candidate Set:  ['t', ' ', 'i', 'r']
Support:  0.8
Candidate Set:  [' ', 'r', 'o', 'l']
Support:  0.8
Candidate Set:  ['t', 'n', ' ', 'r']
Support:  0.8
Candidate Set:  ['t', ' ', 'r', 'o']
Support:  0.84
Candidate Set:  [',', 'e', 'a', ' ', 'r']
Support:  0.84
Candidate Set:  [',', 'a', ' ', 'r', 'i']
Support:  0.8
Candidate Set:  ['n', ',', 'a',

## 3) Display the association rules

In [None]:
from apyori import apriori
import pandas as pd

items = df['Item'].values
data_list = items.tolist()

rules = apriori(data_list, min_support=0.83, \
                min_confidence=0.9, \
                min_lift=1.1, \
                min_length=2, verbose=True)

for rule in rules :
    print(rule)


RelationRecord(items=frozenset({' ', 'r', 'o'}), support=0.84, ordered_statistics=[OrderedStatistic(items_base=frozenset({' '}), items_add=frozenset({'r', 'o'}), confidence=0.9545454545454545, lift=1.1363636363636362), OrderedStatistic(items_base=frozenset({'r', 'o'}), items_add=frozenset({' '}), confidence=1.0, lift=1.1363636363636365)])
RelationRecord(items=frozenset({'e', ' ', 'r', ','}), support=0.84, ordered_statistics=[OrderedStatistic(items_base=frozenset({' ', ','}), items_add=frozenset({'e', 'r'}), confidence=1.0, lift=1.1363636363636365), OrderedStatistic(items_base=frozenset({'e', ' '}), items_add=frozenset({',', 'r'}), confidence=1.0, lift=1.1363636363636365), OrderedStatistic(items_base=frozenset({',', 'r'}), items_add=frozenset({'e', ' '}), confidence=0.9545454545454545, lift=1.1363636363636362), OrderedStatistic(items_base=frozenset({'e', 'r'}), items_add=frozenset({' ', ','}), confidence=0.9545454545454545, lift=1.1363636363636362)])
RelationRecord(items=frozenset({' ',

## 4) Find all the rules of these subsets that have higher confidence value

In [None]:
items = df['Item'].values
data_list = items.tolist()

rules = apriori(data_list, min_support=0.83, \
                min_confidence=0.9, \
                min_lift=1.1, \
                min_length=2)

for rule in rules :
    for ordered_statistic in rule.ordered_statistics:
        if ordered_statistic.confidence > 0.9:
            print(ordered_statistic)

OrderedStatistic(items_base=frozenset({' '}), items_add=frozenset({'r', 'o'}), confidence=0.9545454545454545, lift=1.1363636363636362)
OrderedStatistic(items_base=frozenset({'r', 'o'}), items_add=frozenset({' '}), confidence=1.0, lift=1.1363636363636365)
OrderedStatistic(items_base=frozenset({' ', ','}), items_add=frozenset({'e', 'r'}), confidence=1.0, lift=1.1363636363636365)
OrderedStatistic(items_base=frozenset({'e', ' '}), items_add=frozenset({',', 'r'}), confidence=1.0, lift=1.1363636363636365)
OrderedStatistic(items_base=frozenset({',', 'r'}), items_add=frozenset({'e', ' '}), confidence=0.9545454545454545, lift=1.1363636363636362)
OrderedStatistic(items_base=frozenset({'e', 'r'}), items_add=frozenset({' ', ','}), confidence=0.9545454545454545, lift=1.1363636363636362)
OrderedStatistic(items_base=frozenset({' '}), items_add=frozenset({'a', 'r', 'o'}), confidence=0.9545454545454545, lift=1.1363636363636362)
OrderedStatistic(items_base=frozenset({' ', 'a'}), items_add=frozenset({'r'