# Import Libraries

In [0]:
import numpy as np
import pandas as pd

# Generating Data

In [2]:
data={'products': ['bread eggs milk', 'milk cheese', 'bread butter cheese', 
                   'bread eggs' ]}
transactions = pd.DataFrame(data=data, index=[1,2,3,4])
transactions

Unnamed: 0,products
1,bread eggs milk
2,milk cheese
3,bread butter cheese
4,bread eggs


# Preprocessing

In [3]:
expand = transactions['products'].str.split(expand=True)
expand

Unnamed: 0,0,1,2
1,bread,eggs,milk
2,milk,cheese,
3,bread,butter,cheese
4,bread,eggs,


In [4]:
products=[]
for col in expand.columns:
  for product in expand[col].unique():
    if product is not None and product not in products:
      products.append(product)
      
products.sort()
print(products)

['bread', 'butter', 'cheese', 'eggs', 'milk']


In [5]:
transactions_encoded = np.zeros((len(transactions), len(products)), dtype='int8')
transactions_encoded

array([[0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0]], dtype=int8)

In [6]:
for row in zip(range(len(transactions)), transactions_encoded, expand.values):
  for idx, product in enumerate(products):
    if product in row[2]:
      transactions_encoded[row[0], idx]=1
transactions_encoded

array([[1, 0, 0, 1, 1],
       [0, 0, 1, 0, 1],
       [1, 1, 1, 0, 0],
       [1, 0, 0, 1, 0]], dtype=int8)

In [7]:
transactions_encoded_df = pd.DataFrame(transactions_encoded, columns=products)
transactions_encoded_df

Unnamed: 0,bread,butter,cheese,eggs,milk
0,1,0,0,1,1
1,0,0,1,0,1
2,1,1,1,0,0
3,1,0,0,1,0


# Algorithm Apriori

In [8]:
from mlxtend.frequent_patterns import apriori, association_rules

supports = apriori(transactions_encoded_df, min_support=0.0, use_colnames=True)
supports

Unnamed: 0,support,itemsets
0,0.75,(bread)
1,0.25,(butter)
2,0.5,(cheese)
3,0.5,(eggs)
4,0.5,(milk)
5,0.25,"(bread, butter)"
6,0.25,"(bread, cheese)"
7,0.5,"(bread, eggs)"
8,0.25,"(bread, milk)"
9,0.25,"(butter, cheese)"


In [10]:
supports= apriori(transactions_encoded_df, min_support=0.3, use_colnames=True)
supports

Unnamed: 0,support,itemsets
0,0.75,(bread)
1,0.5,(cheese)
2,0.5,(eggs)
3,0.5,(milk)
4,0.5,"(bread, eggs)"


In [11]:
rules = association_rules(supports, metric='confidence', min_threshold=0.65)
rules = rules.iloc[:, [0,1,4,5,6]]
rules

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(bread),(eggs),0.5,0.666667,1.333333
1,(eggs),(bread),0.5,1.0,1.333333


In [12]:
rules.sort_values(by='lift', ascending=False)

Unnamed: 0,antecedents,consequents,support,confidence,lift
0,(bread),(eggs),0.5,0.666667,1.333333
1,(eggs),(bread),0.5,1.0,1.333333
