In [1]:
import pandas as pd
import xlrd
from mlxtend.preprocessing import TransactionEncoder
import numpy as np
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
import plotly.express as px

In [2]:
data = pd.read_excel ('set.xlsx', sheet_name='Sheet10', engine='openpyxl')

In [3]:
data.head()

Unnamed: 0,InvoiceNo,Product,Quantity,SS
0,1,Mlijeko,1,SS15
1,1,Voda,1,SS15
2,1,Sokovi,1,SS15
3,1,Kafa,1,SS15
4,1,Čokolade,1,SS15


In [4]:
data.columns

Index(['InvoiceNo', 'Product', 'Quantity', 'SS'], dtype='object')

In [5]:
data.Product.unique()

array(['Mlijeko', 'Voda', 'Sokovi', 'Kafa', 'Čokolade', 'Bombone', 'Keks',
       'Vegeta', 'So', 'Supa', 'Brašno', 'Ulje', 'Tjestenina', 'Začini',
       'Sir', 'Jogurt', 'Piletina', 'Teletina', 'Gljive', 'Jabuke',
       'Kruške', 'Banane', 'Limun', 'Narandža', 'Čips', 'Kolači',
       'Deterdžent', 'Omekšivač', 'Šampon', 'Regenerator', 'Sapun',
       'Sredstvazačišćenje', 'Krompir', 'Luk', 'Kupus', 'Paradajz',
       'Krastavac', 'Kiselasalata', 'Salata', 'Šećer', 'Nescafe',
       'Alkoholnapića', 'Pastazazube', 'Toaletpapir', 'Riža', 'Kukuruz',
       'Kreme', 'Smoki', 'Grisini', 'Kikiriki', 'Kokice', 'Žvake',
       'Mineralnavoda', 'Ostalagaziranapića', 'Puding', 'Šlag',
       'Vrhnjezakuhanje', 'Kockazakafu', 'Puder', 'Maskara', 'Labelo',
       'Četkazakosu', 'Parfem', 'Lakzanokte', 'Lakzakosu', 'Aceton',
       'Energetskapića', 'Lijekovi', 'Biljnipreparati', 'Čajevi', 'Jaja',
       'Hljeb', 'Sjajzausne', 'Karmin', 'Korektor'], dtype=object)

In [6]:
data.SS.unique()

array(['SS15', 'SS25', 'SS35'], dtype=object)

In [7]:
data['Product'] = data['Product'].str.strip()

In [8]:
data.dropna(axis = 0, subset =['InvoiceNo'], inplace = True)
data['InvoiceNo'] = data['InvoiceNo'].astype('str')

In [9]:
# Buyings for SS15
basket_SS15 = (data[data['SS'] =="SS15"]
          .groupby(['InvoiceNo', 'Product'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

In [10]:
# Buyings for SS25
basket_SS25 = (data[data['SS'] =="SS25"]
          .groupby(['InvoiceNo', 'Product'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

In [11]:
# Buyings for SS35
basket_SS35 = (data[data['SS'] =="SS35"]
          .groupby(['InvoiceNo', 'Product'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

In [12]:
# Defining the hot encoding function to make the data suitable
# for the concerned libraries
def hot_encode(x):
    if(x<= 0):
        return 0
    if(x>= 1):
        return 1
 
# Encoding the datasets
basket_encoded = basket_SS15.applymap(hot_encode)
basket_SS15 = basket_encoded
 
basket_encoded = basket_SS25.applymap(hot_encode)
basket_SS25 = basket_encoded

basket_encoded = basket_SS35.applymap(hot_encode)
basket_SS35 = basket_encoded

In [13]:
# Building the model for SS15
frq_items = apriori(basket_SS15, min_support = 0.6, use_colnames = True)
 
# Collecting the inferred rules in a dataframe
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
print(rules.head())

    antecedents consequents  antecedent support  consequent support   support  \
6  (Tjestenina)      (Kafa)            0.671642            0.820896  0.611940   
5     (Mlijeko)      (Kafa)            0.791045            0.820896  0.686567   
1       (Hljeb)      (Kafa)            0.746269            0.820896  0.641791   
2       (Hljeb)   (Mlijeko)            0.746269            0.791045  0.626866   
4        (Kafa)   (Mlijeko)            0.820896            0.791045  0.686567   

   confidence      lift  leverage  conviction  
6    0.911111  1.109899  0.060593    2.014925  
5    0.867925  1.057290  0.037202    1.356077  
1    0.860000  1.047636  0.029182    1.279318  
2    0.840000  1.061887  0.036534    1.305970  
4    0.836364  1.057290  0.037202    1.276949  


In [14]:
# Building the model for SS25
frq_items = apriori(basket_SS25, min_support = 0.6, use_colnames = True)
 
# Collecting the inferred rules in a dataframe
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
print(rules.head())

              antecedents   consequents  antecedent support  \
1                (Brašno)        (Ulje)                0.64   
19  (Tjestenina, Mlijeko)       (Hljeb)                0.60   
24     (Tjestenina, Ulje)       (Hljeb)                0.60   
26          (Ulje, Hljeb)  (Tjestenina)                0.60   
0                  (Ulje)      (Brašno)                0.68   

    consequent support  support  confidence      lift  leverage  conviction  
1                 0.68     0.64    1.000000  1.470588    0.2048         inf  
19                0.80     0.60    1.000000  1.250000    0.1200         inf  
24                0.80     0.60    1.000000  1.250000    0.1200         inf  
26                0.80     0.60    1.000000  1.250000    0.1200         inf  
0                 0.64     0.64    0.941176  1.470588    0.2048        6.12  


In [15]:
# Building the model for SS35
frq_items = apriori(basket_SS35, min_support = 0.6, use_colnames = True)
 
# Collecting the inferred rules in a dataframe
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
print(rules.head())

  antecedents consequents  antecedent support  consequent support  support  \
2  (Piletina)    (Jogurt)               0.625               0.625    0.625   
3    (Jogurt)  (Piletina)               0.625               0.625    0.625   
1   (Mlijeko)     (Hljeb)               0.625               0.750    0.625   
0     (Hljeb)   (Mlijeko)               0.750               0.625    0.625   

   confidence      lift  leverage  conviction  
2    1.000000  1.600000  0.234375         inf  
3    1.000000  1.600000  0.234375         inf  
1    1.000000  1.333333  0.156250         inf  
0    0.833333  1.333333  0.156250        2.25  
