# Apriori

## Importing the libraries

In [None]:
#!pip install apyori

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Data Preprocessing

In [None]:
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header=None) #no header
transactions = []

In [None]:
dataset.values

array([['shrimp', 'almonds', 'avocado', ..., 'frozen smoothie',
        'spinach', 'olive oil'],
       ['burgers', 'meatballs', 'eggs', ..., nan, nan, nan],
       ['chutney', nan, nan, ..., nan, nan, nan],
       ...,
       ['chicken', nan, nan, ..., nan, nan, nan],
       ['escalope', 'green tea', nan, ..., nan, nan, nan],
       ['eggs', 'frozen smoothie', 'yogurt cake', ..., nan, nan, nan]],
      dtype=object)

In [None]:
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header=None) #no header
transactions = []

# Adds the value if each row as a string
for i in range(0, 7501):
  transactions.append([str(dataset.values[i, j]) for j in range(0, 20)])

In [None]:
print(dataset)

## Training the Apriori model on the dataset

In [None]:
from apyori import apriori
rules = apriori(transactions=transactions, min_support=0.003, min_confidence=0.2, min_lift=3, min_length=2, max_length=2)

## Displaying the results

### Results from the output of the apriori function

In [None]:
results = list(rules)
print(results)

[RelationRecord(items=frozenset({'light cream', 'chicken'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]), RelationRecord(items=frozenset({'mushroom cream sauce', 'escalope'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]), RelationRecord(items=frozenset({'pasta', 'escalope'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]), RelationRecord(items=frozenset({'honey', 'fromage blanc'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0.24

### Putting the results well organised into a Pandas DataFrame

Shape of the list (First item)
```
[
RelationRecord(
      items=frozenset({'light cream', 'chicken'}), 
      support=0, 
      ordered_statistics=[ 
          OrderedStatistic(
              items_base=frozenset({'light cream'}),
              items_add=frozenset({'chicken'}),
              confidence=0,
              lift=4
          )
      ])

2nd item
3rd item
...
]
```

To get the confidence of the 1st item -> results[0][2][0][2]

In [None]:
def inspect(results):
    lhs = [result[2][0][0] for result in results]
    rhs = [result[2][0][1] for result in results] 
    support = [result[1] for result in results]
    confidence = [result[2][0][2] for result in results]
    lift = [result[2][0][3] for result in results]
    
    #zip joins tuples together 
    # list separates them respective to their indexes 
    # (lhs[0], rhs[0],  ..."), (lhs[1], rhs[1],  ...")
    return list(zip(lhs, rhs, support, confidence, lift))

resultsInDataFrame = pd.DataFrame(inspect(results), columns=["Left hand side", "Right hand side", "Support", "Confidence", "Lift"])

### Displaying the results non sorted

In [None]:
resultsInDataFrame

Unnamed: 0,Left hand side,Right hand side,Support,Confidence,Lift
0,(light cream),(chicken),0.004533,0.290598,4.843951
1,(mushroom cream sauce),(escalope),0.005733,0.300699,3.790833
2,(pasta),(escalope),0.005866,0.372881,4.700812
3,(fromage blanc),(honey),0.003333,0.245098,5.164271
4,(herb & pepper),(ground beef),0.015998,0.32345,3.291994
5,(tomato sauce),(ground beef),0.005333,0.377358,3.840659
6,(light cream),(olive oil),0.0032,0.205128,3.11471
7,(whole wheat pasta),(olive oil),0.007999,0.271493,4.12241
8,(pasta),(shrimp),0.005066,0.322034,4.506672


### Displaying the results sorted by descending lifts

In [None]:
resultsInDataFrame.sort_values(by=['Lift'], ascending=False)

Unnamed: 0,Left hand side,Right hand side,Support,Confidence,Lift
3,(fromage blanc),(honey),0.003333,0.245098,5.164271
0,(light cream),(chicken),0.004533,0.290598,4.843951
2,(pasta),(escalope),0.005866,0.372881,4.700812
8,(pasta),(shrimp),0.005066,0.322034,4.506672
7,(whole wheat pasta),(olive oil),0.007999,0.271493,4.12241
5,(tomato sauce),(ground beef),0.005333,0.377358,3.840659
1,(mushroom cream sauce),(escalope),0.005733,0.300699,3.790833
4,(herb & pepper),(ground beef),0.015998,0.32345,3.291994
6,(light cream),(olive oil),0.0032,0.205128,3.11471


In [None]:
resultsInDataFrame.nlargest(n=10, columns='Lift')

Unnamed: 0,Left hand side,Right hand side,Support,Confidence,Lift
3,(fromage blanc),(honey),0.003333,0.245098,5.164271
0,(light cream),(chicken),0.004533,0.290598,4.843951
2,(pasta),(escalope),0.005866,0.372881,4.700812
8,(pasta),(shrimp),0.005066,0.322034,4.506672
7,(whole wheat pasta),(olive oil),0.007999,0.271493,4.12241
5,(tomato sauce),(ground beef),0.005333,0.377358,3.840659
1,(mushroom cream sauce),(escalope),0.005733,0.300699,3.790833
4,(herb & pepper),(ground beef),0.015998,0.32345,3.291994
6,(light cream),(olive oil),0.0032,0.205128,3.11471


## Notes

  * min_confidence: I chose 0.8. I had too few rules, so I divided it by 2. And repeat.
  * min_lift: a good lift is 3, below is irrelevant
  * min_length & max_length: min and max number of elements in our rules.
<br/>

**Let's consider the following scenarios:**
  * "Buy 1 toothpaste, Get 1 toothbrush for free". For that, we'd use min = max length = 2.
  * "Buy 10 products A and get 1 product B for free" For that, min_length = 1 and max_length = 11.