# Network Visualization of Association Rules


In the context of Market Basket Analysis, Association Rules can be described as a set of relationships between the items
that are purchased from the customers.
* Apriori Algorithm:
    * Support - The ratio of all transactions at which a specific item appears.

    * Confidence - **Conf(A, B) = Support(A, B) / Support(A)**

    * Lift - The probability of purchasing item B when item A is sold. **p(B; A) = Support(A, B) / (Support(A) * Support(B))**
    
Dataset - [E-commerce platform generated transactions](http://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx)


[Additional Information on Apriori](https://www.kaggle.com/code/parisanahmadi/how-to-solve-the-apriori-algorithm-in-a-simple-way)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import networkx as nx
import os

In [None]:
# Please refer to requirements.txt for a full list of packages
# pip install networkx

In [None]:
# !pip install efficient_apriori
# !pip install networkx
from efficient_apriori import apriori

In [None]:
# Load data
if os.path.exists('online_retail.csv'):
    df = pd.read_csv('online_retail.csv')
else:
    df = pd.read_excel('http://archive.ics.uci.edu/ml/machine-learning-databases/00352/Online%20Retail.xlsx')

    df.to_csv('online_retail.csv')

df.describe(include='all')

In [None]:
df = df[['InvoiceNo', 'Description']]
df.isnull().sum()

In [None]:
df= df.dropna(subset=['Description'])
df.head()

In [None]:
hist = df['Description'].value_counts().hist(bins=50, figsize = (15,10))

In [None]:
# Group sold items according their Invoice Number
grouped_transac = df.groupby('InvoiceNo').agg(list)
grouped_transac.head()

In [None]:
def clean_transactions(items):
    cleaned = list()
    for item in items:
        # print(item)
        # Most of the values are string but there are some item numbers 
        cleaned.append(str(item).strip(' .'))
    return tuple(cleaned)
transactions = grouped_transac['Description'].apply(clean_transactions).tolist()

In [None]:
# A shorthand way to implement the procedure above
clean_transac = lambda t_list: tuple([x.strip(' .') for x in t_list])

# transactions = grouped_transac['Description'].apply(clean_transac).tolist()
transactions = grouped_transac['Description'].apply(clean_transactions).tolist()
print(transactions[0])

In [None]:
# Run apriori
%time itemsets, rules = apriori(transactions, min_support=0.007, min_confidence=0.70, verbosity=1)

In [None]:
# Print top 10 rules with highest confidence
for rule in sorted(rules, key=lambda rule: rule.confidence)[-10:]:
  print(rule)

In [None]:
# Construct the Network/Graph
network = nx.DiGraph()
all_rules = []
for n, rule in enumerate(rules[-100:]):
    # Add rule node
    rule_label = "rule {}".format(n)
    all_rules.append(rule_label)
    network.add_node(rule_label, lift=rule.lift)
    
    # Add nodes/items affecting the rule (lhs)
    network.add_edges_from([(item, rule_label) for item in rule.lhs])
    # Add nodes/items that are the outcome of the rule (rhs)
    network.add_edges_from([(rule_label, item) for item in rule.rhs])


In [None]:
# Define matplotlib.pyplot axes(canvas)
_, ax = plt.subplots(figsize=(17, 12))

# nx.draw_spring(network, ax=ax)
pos = nx.random_layout(network)

# Draw edges
edges = nx.draw_networkx_edges(network, pos, alpha=0.2, width=1.5)

# Draw item nodes
nx.draw_networkx_nodes(network, 
                       pos=pos, 
                       ax=ax,
                       node_color='b',
                       alpha=0.5,
                       label='Items',
                       nodelist=[node for node in network.nodes() if node not in all_rules])

# Draw rule nodes
nx.draw_networkx_nodes(network, 
                       pos=pos, 
                       ax=ax,
                       node_color='g',
                       alpha=0.7,
                       node_shape='s',
                       label='Rules',
                       nodelist=[node for node in all_rules],
                       node_size=[8*network.nodes[rule]['lift'] for rule in all_rules])

# Draw node labels
labels = nx.draw_networkx_labels(network,pos=pos, ax=ax, font_size=10)

legend = ax.legend()

In [None]:

# Let's explore the data as network

# Calculating network metrics first

betweenness = nx.betweenness_centrality(network)
# isinstance(bb, dict)
# True
nx.set_node_attributes(network, betweenness, "betweenness")

# betweenness

degree = dict(network.degree())
nx.set_node_attributes(network, degree, "degree")





In [None]:
network.nodes(data=True)

In [None]:
# Creating a dataframe out of network nodes
df = pd.DataFrame.from_dict(network.nodes(data=True))

# pd.json_normalize(df[1])
#pd.Series(df[1].tolist())

df = pd.merge(df,pd.json_normalize(df[1]), left_index=True, right_index=True)


In [None]:
df

In [None]:
import seaborn as sns
sns.scatterplot(data=df, x="degree", y="betweenness")

In [None]:
print('Thank you')

In [None]:
# Task: Try to improve the network visualization