In [1]:
import os
import re

import pandas as pd
import numpy as np

import plotly.express as px
import networkx as nx

import json
import copy

In [2]:
from IPython.display import display

In [3]:
from mlxtend.frequent_patterns import apriori
from mlxtend.frequent_patterns import association_rules
from mlxtend.preprocessing import TransactionEncoder

In [24]:
from networks.RulesGraphManager import RulesGraphManager as RGM
from networks.ProductNetwork import ProductNetwork
from networks.CrossSellingProducts import CrossSellingProducts

from grouper.NxGrouper import NxGrouper
from charts.HeatmapXTab import HeatmapCrosstab

from echarts.EgraphForce import EgraphForce
from echarts.EgraphStandard import EgraphStandard
from echarts.JupyterEcharts import JupyterEcharts

In [5]:
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_colwidth', None)

### Data Preparation

#### Data Cleaning

In [6]:
def get_cleaned_df():
    global COL_CUSTOMER_ID, COL_ORDER_DATE, COL_ORDER_ID, COL_VALUE, COL_QUANTITY
    COL_CUSTOMER_ID = 'Customer ID'
    COL_ORDER_DATE = 'InvoiceDate'
    COL_ORDER_ID = 'Invoice'
    COL_VALUE = 'Sales'
    COL_QUANTITY = 'Quantity'
    
    pathname = os.path.join("F:\\Data\\datas", "online_retail_II.csv")
    df = pd.read_csv(
        pathname, 
        dtype = {'Customer ID': str, 'Invoice': str},
        parse_dates = ['InvoiceDate']
    )
    
    df = df.dropna()
    df['Sales'] = df['Price'] * df['Quantity']
    
    price_0 = df[df['Price'] == 0].index
    df = df.drop(index=price_0)
    
    # From this data below, we know that the canceled invoice (the initialize invoice) already dropped out in our dataset
    # Our action is to get rid of the canceled Invoice. The canceled invoice code starts with "C"
#     display(df[(df[COL_CUSTOMER_ID] == '16321.0') & (df['StockCode'] == '22087')])
    df = df[~df['Invoice'].str.startswith('C')]
    df['Description'] = df['Description'].map(lambda x: x.strip())
    df['Description'] = df['Description'].map(lambda x: re.sub(r'\s{2,}', ' ', x))
    
    return df[['Invoice', 'Description']]

In [7]:
df = get_cleaned_df()

#### Data Encoding

In [8]:
def encoding_data(df, COL_ITEM_ID, COL_ORDER_ID):
    df = df.copy()
    df[COL_ITEM_ID] = df[COL_ITEM_ID].transform(lambda x: [x])
    df = df.groupby(COL_ORDER_ID, as_index=False).sum()[COL_ITEM_ID]
    df = df.map(lambda x: list(set(x)))
                          
    encoder = TransactionEncoder()
    one_hot_transactions = pd.DataFrame(
        encoder.fit(df).transform(df), columns=encoder.columns_
    )
    
    return one_hot_transactions

In [9]:
df_transactions = encoding_data(df, 'Description', 'Invoice')

### Data Mining (Rules Extraction)

In [10]:
frequent_itemsets = apriori(df_transactions, min_support= 0.01, use_colnames=True, max_len = 3, low_memory=True)
rules = association_rules(frequent_itemsets, metric="support", min_threshold = 0.01)

In [11]:
rules['antecedents'] = rules['antecedents'].map(lambda x: ''.join(list(x)))
rules['consequents'] = rules['consequents'].map(lambda x: ''.join(list(x)))

In [12]:
rules.shape

(438, 10)

### Network Visualization with Echarts

In [13]:
myRGM = RGM(rules, 'antecedents', 'consequents')
df_nodes, df_edges = myRGM.get_graph_features()

#### Profile Network

In [14]:
df_nodes_profile = NxGrouper.greedy_modularity_communities(df_nodes, df_edges, 4)

In [16]:
force = EgraphForce(
    df_edges, 
    df_nodes_profile, 
    col_source='antecedents', 
    col_target='consequents', 
    col_name='nodes',
)
profile_force_option = force.get_option()

In [38]:
JupyterEcharts.show(profile_force_option, width='900px', height='900px')

#### Product Network

In [18]:
MyPN = ProductNetwork(rules)
df_bfs, rules_bfs = MyPN.get_bfs_rules(['RED HANGING HEART T-LIGHT HOLDER','HEART OF WICKER LARGE'], 'support', 0, 3, 5)
df_nodes, df_edges = MyPN.get_graph_features(df_bfs, rules_bfs, strict_rules=True)

In [19]:
df_nodes['label'] = df_nodes['rank'].map(lambda x : {"show": True, "position": "right", "formatter": f"{x}"})

In [20]:
product_force = EgraphForce(
    df_edges, 
    df_nodes,
    col_category='depth',
    col_source='antecedents', 
    col_target='consequents',
    col_name='nodes',
)
product_force_option = product_force.get_option(show_legend=True)

In [21]:
JupyterEcharts.show(product_force_option)

#### Cross Selling Products

In [25]:
MyCSP = CrossSellingProducts(rules)
cross_selling_rules = MyCSP.get_cross_selling_products(max_support_ratio_diff=2.0, min_confidence=0.35)

In [26]:
df_nodes, df_edges = MyCSP.get_graph_features(cross_selling_rules)
df_nodes = NxGrouper.greedy_modularity_communities(df_nodes, df_edges)

In [27]:
csp_force = EgraphForce(
    df_edges, df_nodes,
    col_source='antecedents',
    col_target='consequents',
    col_name='nodes',
)
csp_force_option = json.dumps(csp_force.get_option())

In [35]:
csp_force_option

'{"title": {"text": "Graph", "subtext": "Default layout", "top": "bottom", "left": "right"}, "tooltip": {"trigger": "item", "formatter": "{a} <br/>{b} - {c}"}, "legend": [{"data": ["LUNCH BAG WOODLAND", "PACK OF 60 DINOSAUR CAKE CASES", "PLASTERS IN TIN SPACEBOY", "PINK REGENCY TEACUP AND SAUCER", "COOK WITH WINE METAL SIGN", "WOOD 2 DRAWER CABINET WHITE FINISH", "JUMBO STORAGE BAG SUKI", "ALARM CLOCK BAKELIKE RED", "HAND WARMER OWL DESIGN", "Others"]}], "animationDuration": 1500, "animationEasingUpdate": "quinticInOut", "series": [{"name": "Series", "type": "graph", "layout": "force", "data": [{"name": "LUNCH BAG WOODLAND", "category": "LUNCH BAG WOODLAND"}, {"name": "LUNCH BAG BLACK SKULL.", "category": "LUNCH BAG WOODLAND"}, {"name": "LUNCH BAG CARS BLUE", "category": "LUNCH BAG WOODLAND"}, {"name": "PACK OF 60 DINOSAUR CAKE CASES", "category": "PACK OF 60 DINOSAUR CAKE CASES"}, {"name": "LUNCH BAG SUKI DESIGN", "category": "LUNCH BAG WOODLAND"}, {"name": "PLASTERS IN TIN SPACEBOY",

In [28]:
JupyterEcharts.show(csp_force_option)

###  Product Placement with Plotly Heatmap

In [None]:
MyHM = HeatmapCrosstab(rules)

In [None]:
tabular = MyHM.get_tabular_data(
    ['WHITE HANGING HEART T-LIGHT HOLDER', 'RED HANGING HEART T-LIGHT HOLDER'], 
    'support',
    max_col=10
)

In [None]:
MyHM.plot_heatmap(tabular)