# First look at the Intel's dataset

In [1]:
import os

%matplotlib widget
import numpy as np
from scipy.stats import gaussian_kde as gkde
import matplotlib.pyplot as plt
import pandas as pd

from IPython.display import Markdown, clear_output
import ipywidgets as widgets
# def matplotlib_theme_2_jupyter():
    # try: 
    #     with open(f'/home/{os.getlogin()}/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings') as file:
    #         lines = file.readlines()
    #         theme = [line.split(":")[1].split('"')[1] for line in lines if line.find('"theme"')>-1][0]
    # except:
    #     theme = "JupyterLab Light"
    #     print("Jupyter settings file was not found: modify the path above to your jupyter directory, or edit the colors manually")
    # if theme == 'JupyterLab Dark':
        # plt.style.use("dark_background")
        # plt.rcParams['figure.facecolor']=.07*np.ones(3)
        # plt.rcParams['figure.edgecolor']=.07*np.ones(3)
        # plt.rcParams['axes.facecolor']=.07*np.ones(3)  # set theme
        # plt.rcParams['grid.linewidth']=.2
    # else:
    #     plt.style.use("default")    
# matplotlib_theme_2_jupyter()

## Raw Data

In [2]:
df = pd.read_csv("/Datasets/IntelData.csv")
SKUs = df.SKU.unique()
distribution_centers = df["Distribution Center"].unique()
display(df.head())
display(Markdown(f"The dataset consists of {len(df):,} rows, with {len(SKUs)} individual SKUs over {len(distribution_centers)} distribution centers") )

Unnamed: 0,Distribution Center,Product Offering,Generation,SKU,ASP Group,Week,Forecasted Demand,Customer Orders
0,ALPHA,A,2,SKU-A-2,1,1,8949,11146
1,ALPHA,B,2,SKU-B-2,1,1,11146,3503
2,ALPHA,C,1,SKU-C-1,3,1,1274,5892
3,ALPHA,C,2,SKU-C-2,4,1,20717,3185
4,ALPHA,F,1,SKU-F-1,6,1,24522,6529


The dataset consists of 26,114 rows, with 86 individual SKUs over 5 distribution centers

In [3]:
distribution_centers[:3].tolist()

['ALPHA', 'BETA', 'GAMMA']

In [4]:
df_sku = df[df["Distribution Center"].isin(distribution_centers[0:3])]
df_sku

Unnamed: 0,Distribution Center,Product Offering,Generation,SKU,ASP Group,Week,Forecasted Demand,Customer Orders
0,ALPHA,A,2,SKU-A-2,1,1,8949,11146
1,ALPHA,B,2,SKU-B-2,1,1,11146,3503
2,ALPHA,C,1,SKU-C-1,3,1,1274,5892
3,ALPHA,C,2,SKU-C-2,4,1,20717,3185
4,ALPHA,F,1,SKU-F-1,6,1,24522,6529
...,...,...,...,...,...,...,...,...
26028,GAMMA,U,5,SKU-U-5,18,187,4968,1911
26029,GAMMA,V,5,SKU-V-5,19,187,4554,4936
26030,GAMMA,W,5,SKU-W-5,22,187,7611,318
26031,GAMMA,X,5,SKU-X-5,18,187,1752,796


In [5]:
df_sku = df[(df.SKU==SKUs[0])&(df["Distribution Center"].isin(distribution_centers[0:3]))]
df_sku

Unnamed: 0,Distribution Center,Product Offering,Generation,SKU,ASP Group,Week,Forecasted Demand,Customer Orders
0,ALPHA,A,2,SKU-A-2,1,1,8949,11146
24,GAMMA,A,2,SKU-A-2,1,1,28089,4299
97,ALPHA,A,2,SKU-A-2,1,2,11338,19427
128,GAMMA,A,2,SKU-A-2,1,2,28344,13854
195,ALPHA,A,2,SKU-A-2,1,3,14538,13854
...,...,...,...,...,...,...,...,...
13462,GAMMA,A,2,SKU-A-2,1,100,64,318
13604,ALPHA,A,2,SKU-A-2,1,101,1401,3025
13653,GAMMA,A,2,SKU-A-2,1,101,32,159
14040,GAMMA,A,2,SKU-A-2,1,103,32,159


In [6]:
def get_SKU(sku, distribution_center, y_scale="log",show_header=False, show_plot=True, return_subdf=False):
    """
    gets Forecasted Demand and Customer Orders per Week
    
    Parameters
    ----------
    
    sku : string
          specific SKU from the dataset
    """
    
    df_sku = df[(df.SKU==sku)&(df["Distribution Center"].isin(distribution_center))]
    if show_header:
        display(df_sku.head(5))
    if show_plot:
        with plt.ioff():
            num="SKU view"
            if plt.fignum_exists(num):plt.close(num)
            fig,ax = plt.subplots(num=num,ncols=3,sharey=True,figsize=(12,5),
                                  gridspec_kw=dict(width_ratios=[4,1,1],wspace=.05,
                                                  top=.95,right=.99))
            # data in time domain
            ax[0].plot(df_sku.Week, df_sku["Forecasted Demand"],label="Forecast",lw=0,marker="o",alpha=.4)
            ax[0].plot(df_sku.Week, df_sku["Customer Orders"],label="Orders",lw=0,marker="o",alpha=.4)
            ax[0].legend(frameon=False)
            ax[0].set(xlabel="Time $t$ [Week]",
                      ylabel="Forecast and orders [Count]",
                      title=f"Number of rows = {len(df_sku)}",
                      yscale=y_scale)
            
            # data histograms
            def show_distribution(x=df_sku["Forecasted Demand"].values, y_scale=y_scale, ax_=ax[1],color="C0"):
                """
                show histograms and (for linear scale gaussian kde)
                """
                x = x[x>0]
                m, M = x.min(),x.max()
                p_orders = gkde(x)
                if y_scale=="log":
                    bins = np.logspace(np.log10(m),np.log10(M),20)
                    ax_.hist(x,bins,orientation="horizontal",label="Forecast",alpha=.4,density=False,color=color)
                else:
                    bins = np.linspace(m,M,20)
                    x_ = np.linspace(m,M,200)
                    ax_.hist(x,bins,orientation="horizontal",label="Forecast",alpha=.4,density=True,color=color)
                    ax_.plot(p_orders(x_),x_,color=color)

                
            
            show_distribution()
            show_distribution(x =  df_sku["Customer Orders"].values, y_scale=y_scale, ax_=ax[2],color="C1")
                          
            fig.canvas.header_visible=False
            fig.canvas.toolbar_visible=False
            fig.canvas.show()
    if return_subdf:
        return(df_sku)

widgets.interact(get_SKU, sku=widgets.Dropdown(options=SKUs),show_plot=widgets.fixed(True),return_subdf=widgets.fixed(False),
                 distribution_center=widgets.SelectMultiple(options=distribution_centers,value=[distribution_centers[0],]),
                 y_scale=widgets.RadioButtons(options=["log","linear"],description="$y$-scale"))

interactive(children=(Dropdown(description='sku', options=('SKU-A-2', 'SKU-B-2', 'SKU-C-1', 'SKU-C-2', 'SKU-F-…

<function __main__.get_SKU(sku, distribution_center, y_scale='log', show_header=False, show_plot=True, return_subdf=False)>

In [9]:
class IntelDataViewer():
    def __init__(self):
        self.set_widgets()
        self.set_figure()
        self.update_data_plot(1)
        
    def set_widgets(self):
        # define 
        self.sku_w  = widgets.Dropdown(options=SKUs,description="SKU")
        self.dis_center_w=widgets.SelectMultiple(options=distribution_centers,value=[distribution_centers[0],],description="Center")
        self.show_header_w = widgets.Checkbox(description="Show header")
        self.y_scale_w =widgets.RadioButtons(options=["log","linear"],description="$y$-scale")
        self.output_fig_w = widgets.Output()
        self.display_w = widgets.Output()
        # display
        display( widgets.VBox([ widgets.HBox([self.dis_center_w, self.sku_w,self.y_scale_w, self.show_header_w]),
                               widgets.HBox([self.output_fig_w, self.display_w])]))
        # activate
        for w_ in [self.sku_w, self.dis_center_w,self.show_header_w]:
            w_.observe(self.update_data_plot,names="value")
        
         
            
    def update_data_plot(self,event):
        df_sku = df[(df.SKU==self.sku_w.value)&(df["Distribution Center"].isin(self.dis_center_w.value))]
        
        self.forecast_plot.set_xdata(df_sku.Week)
        self.forecast_plot.set_ydata(df_sku["Forecasted Demand"])
        
        self.orders_plot.set_xdata(df_sku.Week)
        self.orders_plot.set_ydata(df_sku["Customer Orders"])
        
        self.ax[0].set_xlim(0,df_sku.Week.values.max())
        self.ax[0].set_ylim(df_sku["Forecasted Demand"].values.min(),df_sku["Forecasted Demand"].values.max())
        
        if self.y_scale_w == "log":
            clear_output()
            bins = np.logspace(np.log10(df_sku["Forecasted Demand"].values.min()),np.log10(df_sku["Forecasted Demand"].values.max()),20)
            self.ax[1].hist(df_sku["Forecasted Demand"].values,bins,orientation="horizontal",label="Forecast",alpha=.4,density=True,color="C0")
        else:
            clear_output()
            bins = np.linspace((df_sku["Forecasted Demand"].values.min()),(df_sku["Forecasted Demand"].values.max()),20)
            self.ax[1].hist(df_sku["Forecasted Demand"].values,bins,orientation="horizontal",label="Forecast",alpha=.4,density=True,color="C0")
        
        if self.y_scale_w == "log":
            clear_output()
            bins = np.logspace(np.log10(df_sku["Customer Orders"].values.min()),np.log10(df_sku["Customer Orders"].values.max()),20)
            self.ax[2].hist(df_sku["Customer Orders"].values,bins,orientation="horizontal",label="Forecast",alpha=.4,density=True,color="C1")
        else:
            clear_output()
            bins = np.linspace((df_sku["Customer Orders"].values.min()),(df_sku["Customer Orders"].values.max()),20)
            self.ax[2].hist(df_sku["Customer Orders"].values,bins,orientation="horizontal",label="Forecast",alpha=.4,density=True,color="C1")
        
        
        self.fig.canvas.draw()
        if self.show_header_w.value:
            with self.display_w:
                clear_output()
                display(df_sku.head(5))
                
        # def show_distribution(x=df_sku["Forecasted Demand"].values, y_scale=self.y_scale_w, ax_=self.ax[1],color="C0"):
        #     """
        #     show histograms and (for linear scale gaussian kde)
        #     """
        #     x = x[x>0]
        #     m, M = x.min(),x.max()
        #     p_orders = gkde(x)
        #     if y_scale=="log":
        #         bins = np.logspace(np.log10(m),np.log10(M),20)
        #         ax_.hist(x,bins,orientation="horizontal",label="Forecast",alpha=.4,density=False,color=color)
        #     else:
        #         bins = np.linspace(m,M,20)
        #         x_ = np.linspace(m,M,200)
        #         ax_.hist(x,bins,orientation="horizontal",label="Forecast",alpha=.4,density=True,color=color)
        #         ax_.plot(p_orders(x_),x_,color=color)
        # show_distribution()
        # show_distribution(x =  df_sku["Customer Orders"].values, y_scale=self.y_scale_w, ax_=self.ax[2],color="C1")
            
    def set_figure(self):
        with self.output_fig_w:
            with plt.ioff():
                num="SKU view"
                if plt.fignum_exists(num):plt.close(num)
                self.fig,self.ax = plt.subplots(num=num,ncols=3,sharey=True,figsize=(9,5),
                                      gridspec_kw=dict(width_ratios=[4,1,1],wspace=.05,
                                                      top=.95,right=.99))
                # data in time domain
                self.forecast_plot, = self.ax[0].plot([],[],label="Forecast",lw=0,marker="o",alpha=.4)
                self.orders_plot, = self.ax[0].plot([],[],label="Orders",lw=0,marker="o",alpha=.4)
                self.ax[0].legend(frameon=False)
                self.ax[0].set(xlabel="Time $t$ [Week]",ylabel="Forecast and Orders [Count]",yscale=self.y_scale_w.value)
                


                self.fig.canvas.header_visible=False
                self.fig.canvas.toolbar_visible=False
                self.fig.canvas.show()            

intelViewer = IntelDataViewer()

VBox(children=(HBox(children=(SelectMultiple(description='Center', index=(0,), options=('ALPHA', 'BETA', 'GAMM…

In [None]:
intelViewer.ax[0].relim()
intelViewer.fig.canvas.draw()