# Market Basket Optimization case study 

There is a shop owner in France who likes to boost the sales of his shop. So, he is trying to find the best association of products to sell to the customers in a deal : Buy one product and get another one for free.

## Import libraries

In [5]:
import numpy as np
import pandas as pd

## Import dataset

In [6]:
dataset = pd.read_csv('C:/Users/binayak/Videos/Machine Learning/Association Rule Learning/Eclat/Market_Basket_Optimisation.csv', header=None)

In [7]:
dataset

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
1,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
2,chutney,,,,,,,,,,,,,,,,,,,
3,turkey,avocado,,,,,,,,,,,,,,,,,,
4,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7496,butter,light mayo,fresh bread,,,,,,,,,,,,,,,,,
7497,burgers,frozen vegetables,eggs,french fries,magazines,green tea,,,,,,,,,,,,,,
7498,chicken,,,,,,,,,,,,,,,,,,,
7499,escalope,green tea,,,,,,,,,,,,,,,,,,


In [8]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7501 entries, 0 to 7500
Data columns (total 20 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   0       7501 non-null   object
 1   1       5747 non-null   object
 2   2       4389 non-null   object
 3   3       3345 non-null   object
 4   4       2529 non-null   object
 5   5       1864 non-null   object
 6   6       1369 non-null   object
 7   7       981 non-null    object
 8   8       654 non-null    object
 9   9       395 non-null    object
 10  10      256 non-null    object
 11  11      154 non-null    object
 12  12      87 non-null     object
 13  13      47 non-null     object
 14  14      25 non-null     object
 15  15      8 non-null      object
 16  16      4 non-null      object
 17  17      4 non-null      object
 18  18      3 non-null      object
 19  19      1 non-null      object
dtypes: object(20)
memory usage: 586.1+ KB


In [9]:
transactions=[]
for i in range(0,7501):
        transactions.append([str(dataset.values[i,j]) for j in range(0,20)])

In [10]:
transactions[0:2]

[['shrimp',
  'almonds',
  'avocado',
  'vegetables mix',
  'green grapes',
  'whole weat flour',
  'yams',
  'cottage cheese',
  'energy drink',
  'tomato juice',
  'low fat yogurt',
  'green tea',
  'honey',
  'salad',
  'mineral water',
  'salmon',
  'antioxydant juice',
  'frozen smoothie',
  'spinach',
  'olive oil'],
 ['burgers',
  'meatballs',
  'eggs',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan']]

In [11]:
len(transactions)

7501

## Training the Apriori model on the dataset

In [12]:
!pip install apyori



In [13]:
from apyori import apriori
rules = apriori(transactions=transactions,
                min_support=0.003, # we assume that a product must appear 3 times a day i.e. 21 times a week to be considered for association
                min_confidence=0.2, # rule of thumb
                min_lift=3, # mostly choose 3 , it gives better results 
                min_length=2,
                max_length=2)

## Visualizing the results

In [14]:
results = list(rules)
results

[RelationRecord(items=frozenset({'chicken', 'light cream'}), support=0.004532728969470737, ordered_statistics=[OrderedStatistic(items_base=frozenset({'light cream'}), items_add=frozenset({'chicken'}), confidence=0.29059829059829057, lift=4.84395061728395)]),
 RelationRecord(items=frozenset({'escalope', 'mushroom cream sauce'}), support=0.005732568990801226, ordered_statistics=[OrderedStatistic(items_base=frozenset({'mushroom cream sauce'}), items_add=frozenset({'escalope'}), confidence=0.3006993006993007, lift=3.790832696715049)]),
 RelationRecord(items=frozenset({'escalope', 'pasta'}), support=0.005865884548726837, ordered_statistics=[OrderedStatistic(items_base=frozenset({'pasta'}), items_add=frozenset({'escalope'}), confidence=0.3728813559322034, lift=4.700811850163794)]),
 RelationRecord(items=frozenset({'honey', 'fromage blanc'}), support=0.003332888948140248, ordered_statistics=[OrderedStatistic(items_base=frozenset({'fromage blanc'}), items_add=frozenset({'honey'}), confidence=0

## Putting results in pandas dataframe 

In [17]:
def inspect(results):
    product_1 = [tuple(result[2][0][0])[0] for result in results]
    product_2 = [tuple(result[2][0][1])[0] for result in results]
    supports  = [result[1] for result in results]
    return list(zip(product_1, product_2, supports))

In [18]:
inspect(results)

[('light cream', 'chicken', 0.004532728969470737),
 ('mushroom cream sauce', 'escalope', 0.005732568990801226),
 ('pasta', 'escalope', 0.005865884548726837),
 ('fromage blanc', 'honey', 0.003332888948140248),
 ('herb & pepper', 'ground beef', 0.015997866951073192),
 ('tomato sauce', 'ground beef', 0.005332622317024397),
 ('light cream', 'olive oil', 0.003199573390214638),
 ('whole wheat pasta', 'olive oil', 0.007998933475536596),
 ('pasta', 'shrimp', 0.005065991201173177)]

In [21]:
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Product_1', 'Product_2','supports'])
resultsinDataFrame

Unnamed: 0,Product_1,Product_2,supports
0,light cream,chicken,0.004533
1,mushroom cream sauce,escalope,0.005733
2,pasta,escalope,0.005866
3,fromage blanc,honey,0.003333
4,herb & pepper,ground beef,0.015998
5,tomato sauce,ground beef,0.005333
6,light cream,olive oil,0.0032
7,whole wheat pasta,olive oil,0.007999
8,pasta,shrimp,0.005066


In [22]:
resultsinDataFrame.nlargest(n = 10, columns = 'supports')

Unnamed: 0,Product_1,Product_2,supports
4,herb & pepper,ground beef,0.015998
7,whole wheat pasta,olive oil,0.007999
2,pasta,escalope,0.005866
1,mushroom cream sauce,escalope,0.005733
5,tomato sauce,ground beef,0.005333
8,pasta,shrimp,0.005066
0,light cream,chicken,0.004533
3,fromage blanc,honey,0.003333
6,light cream,olive oil,0.0032
