# FP-Growth Algorithm

## 1. Collect the Data

In [30]:
# Importar librerias
import math
import numpy as np
import pandas as pd
import random as rnd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy.stats import skew, kurtosis
from matplotlib.ticker import ScalarFormatter
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.preprocessing import TransactionEncoder
from mlxtend.frequent_patterns import association_rules

# Lectura de datos
aisles_df = pd.read_csv('Data/aisles.csv')
departments_df = pd.read_csv('Data/departments.csv')
orders_df = pd.read_csv('Data/orders.csv')
order_products_prior_df = pd.read_csv('Data/order_products__prior.csv')
order_products_train_df = pd.read_csv('Data/order_products__train.csv')
products_df = pd.read_csv('Data/products.csv')

In [31]:
# Agrupa los datos de las tablas en un solo data frame
order_products = pd.DataFrame()
# order_products = pd.concat([order_products_prior_df, order_products_train_df]) El costo computacional de prior impide incluirlo en el proceso
order_products = order_products_train_df
order_products = order_products.merge(products_df, on = 'product_id', how = 'left')
order_products = order_products.merge(aisles_df, on = 'aisle_id', how = 'left')
order_products = order_products.merge(departments_df, on = 'department_id', how = 'left')
order_products = order_products.merge(orders_df, on = 'order_id', how = 'left')
order_products.head()

Unnamed: 0,order_id,product_id,add_to_cart_order,reordered,product_name,aisle_id,department_id,aisle,department,user_id,eval_set,order_number,order_dow,order_hour_of_day,days_since_prior_order
0,1,49302,1,1,Bulgarian Yogurt,120,16,yogurt,dairy eggs,112108,train,4,4,10,9.0
1,1,11109,2,1,Organic 4% Milk Fat Whole Milk Cottage Cheese,108,16,other creams cheeses,dairy eggs,112108,train,4,4,10,9.0
2,1,10246,3,0,Organic Celery Hearts,83,4,fresh vegetables,produce,112108,train,4,4,10,9.0
3,1,49683,4,0,Cucumber Kirby,83,4,fresh vegetables,produce,112108,train,4,4,10,9.0
4,1,43633,5,1,Lightly Smoked Sardines in Olive Oil,95,15,canned meat seafood,canned goods,112108,train,4,4,10,9.0


In [32]:
# Products
order_product = order_products[['order_id', 'product_name']]
order_product_arrays = order_product.groupby('order_id')['product_name'].apply(list)
order_product_list = order_product_arrays.tolist()

# Aisle
order_aisle = order_products[['order_id', 'aisle']]
order_aisle_arrays = order_aisle.groupby('order_id')['aisle'].apply(list)
order_aisle_list = order_aisle_arrays.tolist()

# Department
order_department = order_products[['order_id', 'department']]
order_department_arrays = order_department.groupby('order_id')['department'].apply(list)
order_department_list = order_department_arrays.tolist()

In [33]:
encoder = TransactionEncoder()

# Product transactions
product_transactions = encoder.fit(order_product_list).transform(order_product_list)
product_itemsets = pd.DataFrame(product_transactions, columns = encoder.columns_)

# Aisle transactions
aisle_transactions = encoder.fit(order_aisle_list).transform(order_aisle_list)
aisle_itemsets = pd.DataFrame(aisle_transactions, columns = encoder.columns_)

# Department transactions
deparment_transactions = encoder.fit(order_department_list).transform(order_department_list)
department_itemsets = pd.DataFrame(deparment_transactions, columns = encoder.columns_)

## 2. FP-Growth

In [34]:
# Product FP-Growth
fpgrowth_product = fpgrowth(product_itemsets, min_support=0.0015, use_colnames=True)

# Aisle FP-Growth
fpgrowth_aisle = fpgrowth(aisle_itemsets, min_support=0.0015, use_colnames=True)

# Department FP-Growth
fpgrowth_department = fpgrowth(department_itemsets, min_support=0.0015, use_colnames=True)

## 3. Association Rules

In [35]:
# Product Association Rules
product_rules = association_rules(fpgrowth_product, metric = "confidence", min_threshold = 0.25)

# Aisle Association Rules
aisle_rules = association_rules(fpgrowth_aisle, metric = "confidence", min_threshold = 0.25)

# Department Association Rules
department_rules = association_rules(fpgrowth_department, metric = "confidence", min_threshold = 0.25)

## 4. Evaluate Association Rules

### 4.1 Product Association Rules

In [36]:
product_rules.describe()

Unnamed: 0,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
count,274.0,274.0,274.0,274.0,274.0,274.0,274.0,274.0
mean,0.010032,0.103784,0.003063,0.318783,4.438138,0.001973,1.326676,0.673568
std,0.009725,0.036218,0.002812,0.059628,7.252355,0.001673,0.138957,0.125584
min,0.002904,0.00439,0.001501,0.250197,1.820493,0.000704,1.158204,0.453666
25%,0.00531,0.083028,0.001677,0.273961,2.281375,0.001151,1.229337,0.565982
50%,0.006951,0.11798,0.002027,0.302502,3.064641,0.001429,1.295488,0.680039
75%,0.010207,0.142719,0.003121,0.346716,3.9216,0.002035,1.391437,0.75268
max,0.083028,0.142719,0.023428,0.598425,71.084466,0.013633,2.196403,0.991258


### 4.2 Aisle Association Rules

In [37]:
aisle_rules.describe()

Unnamed: 0,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
count,545932.0,545932.0,545932.0,545932.0,545932.0,545932.0,545932.0,545932.0
mean,0.006697,0.238615,0.002858,0.491385,2.403927,0.001477,1.788624,0.545018
std,0.00787,0.144576,0.003253,0.196031,0.818432,0.001288,1.028155,0.13763
min,0.00154,0.006897,0.001501,0.25,0.503257,-0.009865,0.622132,-0.4989
25%,0.00346,0.12224,0.001722,0.318321,1.797827,0.000931,1.28218,0.44682
50%,0.00519,0.185102,0.002096,0.440064,2.222814,0.001188,1.450303,0.553826
75%,0.007385,0.327333,0.002911,0.643011,2.841424,0.001603,1.875035,0.652702
max,0.550099,0.550099,0.327333,0.990338,44.006775,0.079624,56.824071,0.984071


### 4.3 Department Association Rules

In [38]:
department_rules.describe()

Unnamed: 0,antecedent support,consequent support,support,confidence,lift,leverage,conviction,zhangs_metric
count,643257.0,643257.0,643257.0,643257.0,643257.0,643257.0,643257.0,643257.0
mean,0.011367,0.17751,0.004653,0.456811,3.302081,0.002693,inf,0.656327
std,0.017607,0.143592,0.00724,0.177813,1.258062,0.002741,,0.14134
min,0.001509,0.025158,0.001501,0.25,0.677482,-0.010351,0.5452067,-0.327773
25%,0.004596,0.083912,0.001943,0.314793,2.353145,0.001329,1.326507,0.582678
50%,0.006814,0.127506,0.002728,0.405113,3.127347,0.001816,1.464049,0.688224
75%,0.01163,0.209269,0.004657,0.55419,4.046843,0.00295,1.767451,0.759608
max,0.738722,0.738722,0.543835,1.0,10.191983,0.051762,inf,0.906818
