In [2]:
pip install mlxtend

Collecting mlxtend
  Downloading mlxtend-0.20.0-py2.py3-none-any.whl (1.3 MB)
Collecting scikit-learn>=1.0.2
  Downloading scikit_learn-1.1.1-cp39-cp39-win_amd64.whl (7.4 MB)
Installing collected packages: scikit-learn, mlxtend
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 0.24.2
    Uninstalling scikit-learn-0.24.2:
      Successfully uninstalled scikit-learn-0.24.2
Successfully installed mlxtend-0.20.0 scikit-learn-1.1.1
Note: you may need to restart the kernel to use updated packages.


In [1]:
import numpy as np
import pandas as pd
from mlxtend.frequent_patterns import apriori, association_rules

ModuleNotFoundError: No module named 'mlxtend'

In [None]:
data=pd.read_csv("OnlineRetail.csv",encoding='latin1')

In [None]:
data.head()

In [None]:
data.info()

In [None]:
data.describe(include='all')

In [None]:
# Stripping extra spaces in the description
data['Description'] = data['Description'].str.strip()
  
# Dropping the rows without any invoice number
data.dropna(axis = 0, subset =['InvoiceNo'], inplace = True)
data['InvoiceNo'] = data['InvoiceNo'].astype('str')
  
# Dropping all transactions which were done on credit
data = data[~data['InvoiceNo'].str.contains('C')]

In [None]:
data.Country.value_counts()

In [None]:
# Transactions done in France
basket_France = (data[data['Country'] =="France"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))
  
# Transactions done in the United Kingdom
basket_UK = (data[data['Country'] =="United Kingdom"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))
  
# Transactions done in Portugal
basket_Por = (data[data['Country'] =="Portugal"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))
  
basket_Sweden = (data[data['Country'] =="Sweden"]
          .groupby(['InvoiceNo', 'Description'])['Quantity']
          .sum().unstack().reset_index().fillna(0)
          .set_index('InvoiceNo'))

In [None]:
# Defining the hot encoding function to make the data suitable 
# for the concerned libraries
def hot_encode(x):
    if(x<= 0):
        return 0
    if(x>= 1):
        return 1

In [None]:
basket_UK

In [None]:
# Encoding the datasets
basket_encoded = basket_France.applymap(hot_encode)
basket_France = basket_encoded
  
basket_encoded = basket_UK.applymap(hot_encode)
basket_UK = basket_encoded
  
basket_encoded = basket_Por.applymap(hot_encode)
basket_Por = basket_encoded
  
basket_encoded = basket_Sweden.applymap(hot_encode)
basket_Sweden = basket_encoded

In [None]:
basket_Por

In [None]:
# Building the model
frq_items = apriori(basket_Por, min_support = 0.05, use_colnames = True)
 
# Collecting the inferred rules in a dataframe
rules = association_rules(frq_items, metric ="lift", min_threshold = 1)
rules = rules.sort_values(['confidence', 'lift'], ascending =[False, False])
print(rules.head())

In [None]:
frq_items

In [None]:
rules

# Glosario de términos...

Support: Qué tan popular un item es. Es la proprción de transacciones en la cual cierto item aparece

Confidence: Qué tan probable es que el item Y (consequents) sea comprado cuando el ítem X (antecedents) es comprado (X->Y)



Lift: Igual que cofidence, pero existe la posibilidad que a veces los items sean comprados con diferentes frecuencias, lo que nos puede afectar la importancioa de la asociación. Lift corrige este pequeña imprecisión...

Si Lift=1, entonces decimos que no existe una asociación entre estos dos items. Si Lift >1 significa que el ítem Y (consequents) es muy probable de ser comprado cuando se compra el ítem X  (antecedents). Si Lift<1  es muy improbable que Y sea comprado cuando X es comprado.



# Referencias

https://www.kdnuggets.com/2016/04/association-rules-apriori-algorithm-tutorial.html

https://towardsdatascience.com/apriori-association-rule-mining-explanation-and-python-implementation-290b42afdfc6

https://towardsdatascience.com/apriori-algorithm-for-association-rule-learning-how-to-find-clear-links-between-transactions-bf7ebc22cf0a

https://www.geeksforgeeks.org/implementing-apriori-algorithm-in-python/

http://rasbt.github.io/mlxtend/user_guide/frequent_patterns/association_rules/