In [25]:
#- Using pandas import the dataset as dataframe

import pandas as pd

df = pd.read_csv('OnlineRetail.csv', encoding='latin1')
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,12/1/2010 8:26,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,12/1/2010 8:26,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,12/1/2010 8:26,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,12/1/2010 8:26,3.39,17850.0,United Kingdom


In [26]:
# Dropping the unnecessary columns
df.drop(['InvoiceNo', 'InvoiceDate', 'UnitPrice'], axis=1, inplace=True)

# Dropping the null values
df.dropna(inplace=True)

# Grouping the items by transaction and country
basket = (df.groupby(['Country', 'CustomerID', 'StockCode'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index(['Country', 'CustomerID']))

# Defining the function to encode the values as 0 or 1 based on the quantity of the product bought
def encode_units(x):
    if x <= 0:
        return 0
    if x >= 1:
        return 1
    
# Applying the function to the basket dataset
basket_sets = basket.applymap(encode_units)    


In [31]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
#Using the apriori algorithm generate a list of item frequently brought together.
frequent_itemsets = apriori(basket_sets, min_support=0.05, use_colnames=True)

# Generating the association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=1)

# Displaying the top 10 association rules
print(rules.head(10))




  antecedents consequents  antecedent support  consequent support   support  \
0     (20726)     (20725)            0.084703            0.121005  0.056621   
1     (20725)     (20726)            0.121005            0.084703  0.056621   
2     (20727)     (20725)            0.104338            0.121005  0.068265   
3     (20725)     (20727)            0.121005            0.104338  0.068265   
4     (20728)     (20725)            0.108676            0.121005  0.069635   
5     (20725)     (20728)            0.121005            0.108676  0.069635   
6     (22382)     (20725)            0.111872            0.121005  0.068721   
7     (20725)     (22382)            0.121005            0.111872  0.068721   
8     (22383)     (20725)            0.098858            0.121005  0.064384   
9     (20725)     (22383)            0.121005            0.098858  0.064384   

   confidence      lift  leverage  conviction  zhangs_metric  
0    0.668464  5.524284  0.046372    2.651279       0.894771  
1   