# Association Rule Learning using Apriori Algorithm by `Mr. Harshit Dawar!`

## Importing Libraries

In [5]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from apyori import apriori

## Loading & Processing Data

In [6]:
data = pd.read_csv("Market_Basket_Optimisation.csv")

In [7]:
data

Unnamed: 0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
0,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
1,chutney,,,,,,,,,,,,,,,,,,,
2,turkey,avocado,,,,,,,,,,,,,,,,,,
3,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
4,low fat yogurt,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7495,butter,light mayo,fresh bread,,,,,,,,,,,,,,,,,
7496,burgers,frozen vegetables,eggs,french fries,magazines,green tea,,,,,,,,,,,,,,
7497,chicken,,,,,,,,,,,,,,,,,,,
7498,escalope,green tea,,,,,,,,,,,,,,,,,,


In [9]:
data.isna().sum()

shrimp                  0
almonds              1754
avocado              3112
vegetables mix       4156
green grapes         4972
whole weat flour     5637
yams                 6132
cottage cheese       6520
energy drink         6847
tomato juice         7106
low fat yogurt       7245
green tea            7347
honey                7414
salad                7454
mineral water        7476
salmon               7493
antioxydant juice    7497
frozen smoothie      7497
spinach              7498
olive oil            7500
dtype: int64

***Although, there are many none entries in the data, but still, it should be remain untouched because the entries are of the items purchased with another items, so, here nothing can be done!***

In [32]:
# Converting every data entry into string
data_string_format = []

for i in range(data.shape[0]):
    data_string_format.append([ str(data.iloc[i, j]) for j in range(data.shape[1])])

In [34]:
data_string_format[0]

['burgers',
 'meatballs',
 'eggs',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan',
 'nan']

## Applying Apriori Algorithm!

In [47]:
apriori_results = apriori(transactions = data_string_format,
                          min_lift = 3,
                          min_confidence = 0.3,
                          min_support = 0.005,
                          min_length = 2,
                          max_length = 2) 

In [48]:
results = list(apriori_results)

In [70]:
results

[RelationRecord(items=frozenset({'escalope', 'mushroom cream sauce'}), support=0.005733333333333333, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.30069930069930073, lift=3.7903273197390845)]),
 RelationRecord(items=frozenset({'escalope', 'pasta'}), support=0.005866666666666667, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.37288135593220345, lift=4.700185158809287)]),
 RelationRecord(items=frozenset({'herb & pepper', 'ground beef'}), support=0.016, ordered_statistics=[OrderedStatistic(items_base=frozenset({'herb & pepper'}), items_add=frozenset({'ground beef'}), confidence=0.3234501347708895, lift=3.2915549671393096)]),
 RelationRecord(items=frozenset({'tomato sauce', 'ground beef'}), support=0.005333333333333333, ordered_statistics=[OrderedStatistic(items_base=frozenset({'tomato sauce'}), items_add=frozenset({'ground beef'}), c

In [95]:
# Fetching the items to create a proper pandas dataframe for proper visualization

base_items = np.array([ list(result[2][0][0])[0] for result in results]).reshape(-1, 1)
add_on_items = np.array([ list(result[2][0][1])[0] for result in results]).reshape(-1, 1)
support = np.array([result[1] for result in results]).reshape(-1, 1)
confidence = np.array([ result[2][0][2] for result in results]).reshape(-1, 1)
lift = np.array([ result[2][0][3] for result in results]).reshape(-1, 1)

In [99]:
apriori_data = pd.DataFrame(np.hstack([base_items, add_on_items, support, confidence, lift]),
             columns = ["Base Item", "Add On Item", "Support", "Confidence", "Lift"])

In [103]:
# Final DataFrame
apriori_data.sort_values(by = "Lift", ascending = False)

Unnamed: 0,Base Item,Add On Item,Support,Confidence,Lift
1,pasta,escalope,0.0058666666666666,0.3728813559322034,4.700185158809287
4,pasta,shrimp,0.0050666666666666,0.3220338983050848,4.514493901473151
3,tomato sauce,ground beef,0.0053333333333333,0.3773584905660377,3.840147461662528
0,mushroom cream sauce,escalope,0.0057333333333333,0.3006993006993007,3.790327319739085
2,herb & pepper,ground beef,0.016,0.3234501347708895,3.2915549671393096
