# Apriori

## Importing the libraries

In [32]:
!pip install apyori

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [33]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Data Preprocessing

In [34]:
dataset = pd.read_csv('groceries - groceries.csv', header = None)
transactions = []
for i in range(0, 9835):
  transactions.append([str(dataset.values[i,j]) for j in range(0, 32)])

## Training the Apriori model on the dataset

In [35]:
from apyori import apriori
rules = apriori(transactions = transactions, min_support = 0.003, min_confidence = 0.2, min_lift = 3, min_length = 2, max_length = 2)

## Visualising the results

### Displaying the first results coming directly from the output of the apriori function

In [36]:
results = list(rules)

In [37]:
results

[RelationRecord(items=frozenset({'10', 'domestic eggs'}), support=0.005083884087442806, ordered_statistics=[OrderedStatistic(items_base=frozenset({'10'}), items_add=frozenset({'domestic eggs'}), confidence=0.2032520325203252, lift=3.2034995830727535)]),
 RelationRecord(items=frozenset({'11', 'domestic eggs'}), support=0.0037620742247076767, ordered_statistics=[OrderedStatistic(items_base=frozenset({'11'}), items_add=frozenset({'domestic eggs'}), confidence=0.20329670329670332, lift=3.2042036489151875)]),
 RelationRecord(items=frozenset({'frankfurter', '11'}), support=0.003965429588205389, ordered_statistics=[OrderedStatistic(items_base=frozenset({'11'}), items_add=frozenset({'frankfurter'}), confidence=0.21428571428571433, lift=3.633620689655173)]),
 RelationRecord(items=frozenset({'12', 'domestic eggs'}), support=0.003253685815963396, ordered_statistics=[OrderedStatistic(items_base=frozenset({'12'}), items_add=frozenset({'domestic eggs'}), confidence=0.2735042735042735, lift=4.3107604

### Putting the results well organised into a Pandas DataFrame

In [38]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    confidences = [result[2][0][2] for result in results]
    lifts       = [result[2][0][3] for result in results]
    return list(zip(lhs, rhs, supports, confidences, lifts))
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Left Hand Side', 'Right Hand Side', 'Support', 'Confidence', 'Lift'])

### Displaying the results non sorted

In [39]:
resultsinDataFrame

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
0,10,domestic eggs,0.005084,0.203252,3.2035
1,11,domestic eggs,0.003762,0.203297,3.204204
2,11,frankfurter,0.003965,0.214286,3.633621
3,12,domestic eggs,0.003254,0.273504,4.31076
4,12,other vegetables,0.007524,0.632479,3.270467
5,12,pip fruit,0.003152,0.264957,3.502493
6,12,root vegetables,0.004067,0.34188,3.136561
7,12,tropical fruit,0.003864,0.324786,3.098228
8,12,whipped/sour cream,0.003152,0.264957,3.696248
9,13,other vegetables,0.004779,0.602564,3.115782


### Displaying the results sorted by descending lifts

In [40]:
resultsinDataFrame.nlargest(n = 10, columns = 'Lift')

Unnamed: 0,Left Hand Side,Right Hand Side,Support,Confidence,Lift
15,Instant food products,hamburger meat,0.00305,0.379747,11.421438
22,flour,sugar,0.004982,0.28655,8.463112
25,processed cheese,white bread,0.004169,0.251534,5.975445
19,liquor,bottled beer,0.004677,0.422018,5.240594
12,14,root vegetables,0.003762,0.480519,4.408497
3,12,domestic eggs,0.003254,0.273504,4.31076
24,herbs,root vegetables,0.007016,0.43125,3.956477
18,berries,whipped/sour cream,0.009049,0.272171,3.796886
26,rice,root vegetables,0.003152,0.413333,3.792102
8,12,whipped/sour cream,0.003152,0.264957,3.696248
