In [1]:
import pandas as pd
from mlxtend.frequent_patterns import fpgrowth
from mlxtend.frequent_patterns import apriori, association_rules
import ipywidgets as widgets
from IPython.display import display

## Import data

In [2]:
df = pd.read_csv("DataCoSupplyChainDataset.csv", encoding='latin1')

## Data Preprocessing

In [3]:
customer_item_count = df.groupby('Order Customer Id')['Order Item Quantity'].sum()

Since the Data set is very large, Im going to filter out only top customers, since my laptop is not capable of handling the entire data set.

In [4]:
filtered_customers = customer_item_count[customer_item_count >= 10]

I'm only Filter customers who ought atleast 10 products, and also I'm only taking top 1000 customers only because of memory issues

In [5]:
sorted_customers = filtered_customers.sort_values(ascending=True)
top_customers = sorted_customers.head(1000).index

In [6]:
df_top_customers = df[df['Order Customer Id'].isin(top_customers)]

the top customers has been filtered out and saved to "df_top_customers"

## Basketting (Training)

In [7]:
# extracting unique product names into a list for future purposes
unique_products = df_top_customers['Product Name'].unique().tolist()

In [8]:
# Creating a basket for each of these top 500 customers
basket = (df_top_customers.groupby(['Order Customer Id', 'Product Name'])['Order Item Quantity']
          .sum().unstack().reset_index().fillna(0).set_index('Order Customer Id'))

In [9]:
def encode_units(x):
    return 1 if x >= 1 else 0

In [10]:
basket = basket.map(encode_units)

In [11]:
basket = basket.astype(bool)

In [12]:
frequent_itemsets = fpgrowth(basket, min_support=0.01, use_colnames=True)

In [13]:
minimum_confidence_threshold = 0.01 
rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=minimum_confidence_threshold)

## User Interface

In [14]:
slider_variable = widgets.IntSlider(value=5, min=3, max=10, step=1, description='Top N:')
dropdown_variable = widgets.Dropdown(options=unique_products, value=unique_products[0], description='Product:')

In [15]:
def update_output(change):
    
    top_n = slider_variable.value
    selected_product = dropdown_variable.value

    top_rules = find_top_confidence_rules(selected_product, rules, top_n)

    consequents = top_rules['consequents'].apply(lambda x: list(x)[0]).tolist()
    confidence_levels = top_rules['confidence'].tolist()

    grid_items = []
    for consequent, confidence in zip(consequents, confidence_levels):
        product_label = widgets.Label(value=consequent)
        confidence_bar = widgets.FloatProgress(value=confidence, min=0, max=1, bar_style='info', description=f'{confidence:.4f}')
        grid_items.append(product_label)
        grid_items.append(confidence_bar)

    # Update the grid layout
    grid_layout.children = grid_items

In [16]:
def find_top_confidence_rules(product, rules, top_n=slider_variable.value):
    filtered_rules = rules[rules['antecedents'] == frozenset([product])]
    sorted_rules = filtered_rules.sort_values(by='confidence', ascending=False).head(top_n)
    # Keep only unique consequents
    sorted_rules = sorted_rules.drop_duplicates(subset=['consequents'])
    return sorted_rules

In [17]:
slider_variable.observe(update_output, names='value')
dropdown_variable.observe(update_output, names='value')

In [18]:
recommended_products = widgets.HTML(value="<b>Recommended Products</b>")
grid_layout = widgets.GridBox([], layout=widgets.Layout(grid_template_columns="auto auto"))

In [19]:
display(slider_variable)
display(dropdown_variable)
display(widgets.VBox([recommended_products, grid_layout]))

update_output(None)

IntSlider(value=5, description='Top N:', max=10, min=3)

Dropdown(description='Product:', options=("Nike Men's Dri-FIT Victory Golf Polo", "Under Armour Girls' Toddler…

VBox(children=(HTML(value='<b>Recommended Products</b>'), GridBox(layout=Layout(grid_template_columns='auto au…