# Recommendation System for Retail Stores

## Importing libraries

In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder

## Importing dataset

In [2]:
df = pd.read_csv('C:/Users/hp/Downloads/groceries.csv', names = ['Products'], sep = ',')
df.head()

Unnamed: 0,Products
0,"citrus fruit, tropical fruit, whole milk, butt..."
1,"citrus fruit, semi-finished bread, margarine, ..."
2,"tropical fruit, yogurt , coffee, citrus fruit,..."
3,"whole milk, butter, curd, coffee"
4,"pip fruit, yogurt, cream cheese, chocolate, wh..."


In [3]:
#nof of rows and columns in the dataset
df.shape


(8, 1)

In [4]:
# checking for missing values
df.isnull().sum()

Products    0
dtype: int64

In [5]:
# spliting the products and creating a list called by 'data'
data = list(df["Products"].apply(lambda x:x.split(",") ))
data

[['citrus fruit',
  ' tropical fruit',
  ' whole milk',
  ' butter',
  ' curd',
  ' coffee'],
 ['citrus fruit', ' semi-finished bread', ' margarine', '  tropical fruit'],
 ['tropical fruit',
  ' yogurt ',
  ' coffee',
  ' citrus fruit',
  ' chocolate',
  ' whole milk'],
 ['whole milk', ' butter', ' curd', ' coffee'],
 ['pip fruit', ' yogurt', ' cream cheese', ' chocolate', ' whole milk'],
 ['coffee', ' whole milk', ' condensed milk', ' yogurt', ' chocolate'],
 ['whole milk', ' butter', ' yogurt', ' chocolate'],
 ['tropical fruit', ' white bread', ' bottled water', ' citrus fruit']]

## Apriori Algorithm and One-Hot Encoding

In [6]:
# Transforming the list, with one-hot encoding

a = TransactionEncoder()
a_data = a.fit(data).transform(data)
df = pd.DataFrame(a_data,columns=a.columns_)
df = df.replace(True,1)
df = df.replace(False,0)
df

Unnamed: 0,tropical fruit,bottled water,butter,chocolate,citrus fruit,coffee,condensed milk,cream cheese,curd,margarine,...,tropical fruit.1,white bread,whole milk,yogurt,yogurt.1,citrus fruit.1,coffee.1,pip fruit,tropical fruit.2,whole milk.1
0,0,0,1,0,0,1,0,0,1,0,...,1,0,1,0,0,1,0,0,0,0
1,1,0,0,0,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
2,0,0,0,1,1,1,0,0,0,0,...,0,0,1,0,1,0,0,0,1,0
3,0,0,1,0,0,1,0,0,1,0,...,0,0,0,0,0,0,0,0,0,1
4,0,0,0,1,0,0,0,1,0,0,...,0,0,1,1,0,0,0,1,0,0
5,0,0,0,1,0,0,1,0,0,0,...,0,0,1,1,0,0,1,0,0,0
6,0,0,1,1,0,0,0,0,0,0,...,0,0,0,1,0,0,0,0,0,1
7,0,1,0,0,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,1,0


## Applying Apriori and Resulting 

In [7]:
#set a threshold value for the support value and calculating the support value.
df = apriori(df, min_support = 0.2, use_colnames = True, verbose = 1)
df

Processing 90 combinations | Sampling itemset size 2Processing 72 combinations | Sampling itemset size 3Processing 8 combinations | Sampling itemset size 4




Unnamed: 0,support,itemsets
0,0.375,( butter)
1,0.5,( chocolate)
2,0.25,( citrus fruit)
3,0.375,( coffee)
4,0.25,( curd)
5,0.5,( whole milk)
6,0.375,( yogurt)
7,0.25,(citrus fruit)
8,0.25,(tropical fruit)
9,0.25,(whole milk)


In [8]:
# viewing our interpretation values using the Associan rule function.
df_ar = association_rules(df, metric = "confidence", min_threshold = 0.8)
df_ar

Unnamed: 0,antecedents,consequents,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
0,( curd),( butter),0.25,0.375,0.25,1.0,2.666667,0.15625,inf,0.833333
1,(whole milk),( butter),0.25,0.375,0.25,1.0,2.666667,0.15625,inf,0.833333
2,( yogurt),( chocolate),0.375,0.5,0.375,1.0,2.0,0.1875,inf,0.8
3,(tropical fruit),( citrus fruit),0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
4,( citrus fruit),(tropical fruit),0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
5,( curd),( coffee),0.25,0.375,0.25,1.0,2.666667,0.15625,inf,0.833333
6,"( butter, curd)",( coffee),0.25,0.375,0.25,1.0,2.666667,0.15625,inf,0.833333
7,"( butter, coffee)",( curd),0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
8,"( coffee, curd)",( butter),0.25,0.375,0.25,1.0,2.666667,0.15625,inf,0.833333
9,( curd),"( butter, coffee)",0.25,0.25,0.25,1.0,4.0,0.1875,inf,1.0
