# Eclat

## Importing the libraries

In [1]:
#!pip install apyori

In [2]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

## Data Preprocessing

In [3]:
dataset = pd.read_csv('Market_Basket_Optimisation.csv', header = None)

In [4]:
transactions = dataset.values.astype(str).tolist()

In [5]:
transactions

[['shrimp',
  'almonds',
  'avocado',
  'vegetables mix',
  'green grapes',
  'whole weat flour',
  'yams',
  'cottage cheese',
  'energy drink',
  'tomato juice',
  'low fat yogurt',
  'green tea',
  'honey',
  'salad',
  'mineral water',
  'salmon',
  'antioxydant juice',
  'frozen smoothie',
  'spinach',
  'olive oil'],
 ['burgers',
  'meatballs',
  'eggs',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan'],
 ['chutney',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan'],
 ['turkey',
  'avocado',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan',
  'nan'],
 ['mineral water',
  'milk',
  'energy bar',
  'whole wheat rice',
  'green tea',
  'nan',
  'nan',
  'nan',
 

## Training the Eclat model on the dataset

In [19]:
from apyori import apriori
rules = apriori(transactions = transactions, min_support = 0.003, min_lift = 1.1, min_length = 2, max_length = 2)

# Confidence and Lift do not matter but there is no function specifically for Eclat
# Use apriori but only use support results

# Lift must be higher than 1.0, or single items will be derived instead of associations only!!!

## Visualising the results

### Displaying the first results coming directly from the output of the apriori function

In [20]:
results = list(rules)

In [21]:
results

[RelationRecord(items=frozenset({'burgers', 'almonds'}), support=0.005199306759098787, ordered_statistics=[OrderedStatistic(items_base=frozenset({'almonds'}), items_add=frozenset({'burgers'}), confidence=0.25490196078431376, lift=2.923577382023146), OrderedStatistic(items_base=frozenset({'burgers'}), items_add=frozenset({'almonds'}), confidence=0.05963302752293578, lift=2.923577382023146)]),
 RelationRecord(items=frozenset({'almonds', 'cake'}), support=0.0030662578322890282, ordered_statistics=[OrderedStatistic(items_base=frozenset({'almonds'}), items_add=frozenset({'cake'}), confidence=0.1503267973856209, lift=1.8546074131406949), OrderedStatistic(items_base=frozenset({'cake'}), items_add=frozenset({'almonds'}), confidence=0.03782894736842105, lift=1.8546074131406949)]),
 RelationRecord(items=frozenset({'chocolate', 'almonds'}), support=0.005999200106652446, ordered_statistics=[OrderedStatistic(items_base=frozenset({'almonds'}), items_add=frozenset({'chocolate'}), confidence=0.2941176

In [22]:
len(results) # THIS IS THE ECLAT ALGORITHM, faster but you can derive less insights from it!

635

### Putting the results well organised into a Pandas DataFrame

In [23]:
def inspect(results):
    lhs         = [tuple(result[2][0][0])[0] for result in results]
    rhs         = [tuple(result[2][0][1])[0] for result in results]
    supports    = [result[1] for result in results]
    return list(zip(lhs, rhs, supports))
resultsinDataFrame = pd.DataFrame(inspect(results), columns = ['Product 1', 'Product 2', 'Support'])

### Displaying the results sorted by descending supports

In [24]:
resultsinDataFrame.nlargest(n = 10, columns = 'Support')

Unnamed: 0,Product 1,Product 2,Support
552,mineral water,spaghetti,0.059725
206,chocolate,mineral water,0.05266
275,eggs,mineral water,0.050927
522,milk,mineral water,0.047994
442,ground beef,mineral water,0.040928
218,chocolate,spaghetti,0.039195
454,ground beef,spaghetti,0.039195
280,eggs,spaghetti,0.036528
261,eggs,french fries,0.036395
379,frozen vegetables,mineral water,0.035729
