In [11]:
import numpy as np
import pandas as pd
import mlxtend
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.frequent_patterns import fpgrowth
import plotly
import plotly.express as px

In [12]:
import pyfpgrowth

In [13]:
data = "I1 I2 I5,I2 I4,I2 I3,I1 I2 I4,I1 I3,I2 I3,I1 I3,I1 I2 I3 I5,I1 I2 I3 I6"
transactionsZX = [x.split() for x in data.split(',')]
transactionsZX 

[['I1', 'I2', 'I5'],
 ['I2', 'I4'],
 ['I2', 'I3'],
 ['I1', 'I2', 'I4'],
 ['I1', 'I3'],
 ['I2', 'I3'],
 ['I1', 'I3'],
 ['I1', 'I2', 'I3', 'I5'],
 ['I1', 'I2', 'I3', 'I6']]

In [14]:
te = TransactionEncoder()
dataset_te = te.fit(transactionsZX).transform(transactionsZX)
dataset_te

array([[ True,  True, False, False,  True, False],
       [False,  True, False,  True, False, False],
       [False,  True,  True, False, False, False],
       [ True,  True, False,  True, False, False],
       [ True, False,  True, False, False, False],
       [False,  True,  True, False, False, False],
       [ True, False,  True, False, False, False],
       [ True,  True,  True, False,  True, False],
       [ True,  True,  True, False, False,  True]])

In [15]:
patterns = pyfpgrowth.find_frequent_patterns(transactionsZX,2)
patterns

{('I5',): 2,
 ('I1', 'I5'): 2,
 ('I2', 'I5'): 2,
 ('I1', 'I2', 'I5'): 2,
 ('I4',): 2,
 ('I2', 'I4'): 2,
 ('I1',): 6,
 ('I1', 'I2'): 4,
 ('I2', 'I3'): 4,
 ('I1', 'I2', 'I3'): 2,
 ('I1', 'I3'): 4,
 ('I2',): 7}

In [16]:
rule = pyfpgrowth.generate_association_rules(patterns,0.7)
rule 

{('I5',): (('I1', 'I2'), 1.0),
 ('I1', 'I5'): (('I2',), 1.0),
 ('I2', 'I5'): (('I1',), 1.0),
 ('I4',): (('I2',), 1.0)}

In [17]:
dataset = pd.read_csv(r'Market_Basket_Optimisation.csv')
print(dataset.shape)
dataset.head(5)

(7500, 20)


Unnamed: 0,shrimp,almonds,avocado,vegetables mix,green grapes,whole weat flour,yams,cottage cheese,energy drink,tomato juice,low fat yogurt,green tea,honey,salad,mineral water,salmon,antioxydant juice,frozen smoothie,spinach,olive oil
0,burgers,meatballs,eggs,,,,,,,,,,,,,,,,,
1,chutney,,,,,,,,,,,,,,,,,,,
2,turkey,avocado,,,,,,,,,,,,,,,,,,
3,mineral water,milk,energy bar,whole wheat rice,green tea,,,,,,,,,,,,,,,
4,low fat yogurt,,,,,,,,,,,,,,,,,,,


In [None]:
transaction = []
for i in range(dataset.shape[0]):
    for j in range(dataset.shape[1]):
        transaction.append(dataset.values[i,j])
transaction = np.array(transaction)
print(transaction)

In [None]:
df = pd.DataFrame(transaction,columns=["items"])
df['incident_count'] = 1
indexName = df[df['items'] == 'nan'].index
df.drop(indexName,inplace=True)

In [None]:
df_table = df.groupby('items').sum().sort_values('incident_count',ascending=False).reset_index()
df_table.head(5).style.background_gradient(cmap='Blues')

In [None]:
df_table['all'] = "Top 50 Items"
fig = px.treemap(df_table.head(50),path=['all','items'],values='incident_count',
                 color = df_table['incident_count'].head(50),
                 hover_data=['items'],color_continuous_scale="ylgn")
fig.show()

In [None]:
transaction = []
for i in range(dataset.shape[0]):
    transaction.append([str(dataset.values[i,j]) for j in range(dataset.shape[1])])
transaction = np.array(transaction)
te = TransactionEncoder()
transaction_te = te.fit(transaction).transform(transaction)
transaction_df = pd.DataFrame(transaction_te,columns=te.columns_)
transaction_df.head()

In [None]:
first30 = df_table['items'].head(30).values
print(first30)

In [None]:
transaction_df = transaction_df.loc[:,first30]
print(transaction_df.shape)

In [None]:
frequent_itemsets = fpgrowth(transaction_df, min_support=0.05,use_colnames=True)
frequent_itemsets.head(10)

In [None]:
%timeit  fpgrowth(transaction_df, min_support=0.05,use_colnames=True)

In [None]:
%timeit  apriori(transaction_df, min_support=0.05,use_colnames=True)

In [None]:
rules = association_rules(frequent_itemsets,metric='lift',min_threshold=1)
rules.sort_values('confidence',ascending=False)