# Market Basket Analysis of Store Data

## 1 Using MLxtend :

In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

In [None]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [None]:
te = TransactionEncoder()
te_try = te.fit(dataset).transform(dataset)

In [None]:
te_try

In [None]:
df = pd.DataFrame(te_try, columns=te.columns_)

In [None]:
df

### Model Training

In [None]:
from mlxtend.frequent_patterns import apriori

In [None]:
apriori(df,min_support=0.5)

### Model Training with Column Result return

In [None]:
apriori(df,min_support=0.5, use_colnames=True)

### Calculate the length of Itemset

In [None]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

### Length is 2 and Support is > 0.8

In [None]:
frequent_itemsets[ (frequent_itemsets['length'] == 2) & (frequent_itemsets['support'] >= 0.8) ]

In [None]:
frequent_itemsets[ frequent_itemsets['itemsets'] == {'Onion', 'Eggs'} ]

## 2 Using apyori:

## Dataset Description

* Different products given 7500 transactions over the course of a week at a French retail store.
* We have library(**apyori**) to calculate the association rule using Apriori.

In [None]:
# !pip install apyori

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from apyori import apriori

In [None]:
store_data = pd.read_csv("../Data/store_data.csv", header=None)
display(store_data.head())
print(store_data.shape)

## Preprocessing on Data
*  Here we need a data in form of list for Apriori Algorithm.

In [None]:
records = []
for i in range(1, 7501):
    records.append([str(store_data.values[i, j]) for j in range(0, 20)])

In [None]:
association_rules = apriori(records, min_support=0.0045, min_confidence=0.2, min_lift=3, min_length=2)
association_results = list(association_rules)

## How many relation derived

In [None]:
print("There are {} Relation derived.".format(len(association_results)))

### Association Rules Derived

In [None]:
for i in range(0, len(association_results)):
    print(association_results[i][0])

## Rules Generated

In [None]:
for item in association_results:
    # first index of the inner list
    # Contains base item and add item
    pair = item[0]
    items = [x for x in pair]
    print("Rule: " + items[0] + " -> " + items[1])

    # second index of the inner list
    print("Support: " + str(item[1]))

    # third index of the list located at 0th
    # of the third index of the inner list

    print("Confidence: " + str(item[2][0][2]))
    print("Lift: " + str(item[2][0][3]))
    print("=====================================")

## 2 Using MLxtend :

In [None]:
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder

In [None]:
dataset = [['Milk', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Dill', 'Onion', 'Nutmeg', 'Kidney Beans', 'Eggs', 'Yogurt'],
           ['Milk', 'Apple', 'Kidney Beans', 'Eggs'],
           ['Milk', 'Unicorn', 'Corn', 'Kidney Beans', 'Yogurt'],
           ['Corn', 'Onion', 'Onion', 'Kidney Beans', 'Ice cream', 'Eggs']]

In [None]:
te = TransactionEncoder()
te_try = te.fit(dataset).transform(dataset)

In [None]:
te_try

In [None]:
df = pd.DataFrame(te_try, columns=te.columns_)

In [None]:
df

## Model Training

In [None]:
from mlxtend.frequent_patterns import apriori

In [None]:
apriori(df,min_support=0.5)

In [None]:
## Model Training with Column Result return

In [None]:
apriori(df,min_support=0.5, use_colnames=True)

In [None]:
## Calculate the length of Itemset

In [None]:
frequent_itemsets = apriori(df, min_support=0.6, use_colnames=True)
frequent_itemsets['length'] = frequent_itemsets['itemsets'].apply(lambda x: len(x))
frequent_itemsets

## Length is 2 and Support is > 0.8

In [None]:
frequent_itemsets[ (frequent_itemsets['length'] == 2) & (frequent_itemsets['support'] >= 0.8) ]

In [None]:
frequent_itemsets[ frequent_itemsets['itemsets'] == {'Onion', 'Eggs'} ]