# First look at the Intel's dataset

In [48]:
import os

%matplotlib widget
import numpy as np
from scipy.stats import gaussian_kde as gkde
import matplotlib.pyplot as plt
import pandas as pd

from IPython.display import Markdown, clear_output
import ipywidgets as widgets
def matplotlib_theme_2_jupyter():
    try: 
        with open(f'/home/{os.getlogin()}/.jupyter/lab/user-settings/@jupyterlab/apputils-extension/themes.jupyterlab-settings') as file:
            lines = file.readlines()
            theme = [line.split(":")[1].split('"')[1] for line in lines if line.find('"theme"')>-1][0]
    except:
        theme = "JupyterLab Light"
        print("Jupyter settings file was not found: modify the path above to your jupyter directory, or edit the colors manually")
    if theme == 'JupyterLab Dark':
        plt.style.use("dark_background")
        plt.rcParams['figure.facecolor']=.07*np.ones(3)
        plt.rcParams['figure.edgecolor']=.07*np.ones(3)
        plt.rcParams['axes.facecolor']=.07*np.ones(3)  # set theme
        plt.rcParams['grid.linewidth']=.2
    else:
        plt.style.use("default")    
matplotlib_theme_2_jupyter()

## Raw Data

In [49]:
df = pd.read_csv("IntelData.csv")
SKUs = df.SKU.unique()
distribution_centers = df["Distribution Center"].unique()
display(df.head())
display(Markdown(f"The dataset consists of **{len(df):,}** rows, with **{len(SKUs)}** individual SKUs over **{len(distribution_centers)}** distribution centers") )

Unnamed: 0,Distribution Center,Product Offering,Generation,SKU,ASP Group,Week,Forecasted Demand,Customer Orders
0,ALPHA,A,2,SKU-A-2,1,1,8949,11146
1,ALPHA,B,2,SKU-B-2,1,1,11146,3503
2,ALPHA,C,1,SKU-C-1,3,1,1274,5892
3,ALPHA,C,2,SKU-C-2,4,1,20717,3185
4,ALPHA,F,1,SKU-F-1,6,1,24522,6529


The dataset consists of **26,114** rows, with **86** individual SKUs over **5** distribution centers

## Data viewer class

In [56]:
class IntelDataViewer():
    """
    simple draft of a class for viewing the data
    """
    def __init__(self):
        self.set_widgets()
        self.set_figure()
        self.update_data_plot(1)      
    def set_widgets(self):
        """
        the method creates displays and activates widgets
        """
        # define 
        self.sku_w  = widgets.Dropdown(options=SKUs,description="SKU")
        self.dis_center_w=widgets.SelectMultiple(options=distribution_centers,value=[distribution_centers[0],],description="Center")
        self.show_header_w = widgets.Checkbox(description="Show header",value=True)
        self.y_scale_w =widgets.RadioButtons(options=["log","linear"],description="$y$-scale")
        self.output_fig_w = widgets.Output()
        self.display_w = widgets.Output()
        # display
        display( widgets.VBox([ widgets.HBox([self.dis_center_w, self.sku_w,self.y_scale_w, self.show_header_w]),
                               widgets.HBox([self.output_fig_w, self.display_w])]))
        # activate
        for w_ in [self.sku_w, self.dis_center_w,self.show_header_w, self.y_scale_w]:
            w_.observe(self.update_data_plot,names="value")
    def set_figure(self):
        """
        create figure, axes and plot holders
        """
        with self.output_fig_w:
            with plt.ioff():
                num="SKU view"
                if plt.fignum_exists(num):plt.close(num)
                self.fig,self.ax = plt.subplots(num=num,ncols=3,sharey=True,figsize=(9,3.5),
                                      gridspec_kw=dict(width_ratios=[4,1,1],wspace=.05,
                                                      top=.99,right=.99,bottom=.14))
                # data in time domain
                self.forecast_plot, = self.ax[0].plot([],[],label="Forecast",lw=0,marker="o",alpha=.4)
                
                self.orders_plot, = self.ax[0].plot([],[],label="Orders",lw=0,marker="o",alpha=.4)
                self.ax[0].legend()
                self.ax[0].set(xlabel="Time $t$ [Week]",
                          ylabel="Forecast and orders [Count]",
                          yscale=self.y_scale_w.value)

                self.fig.canvas.header_visible=False
                self.fig.canvas.toolbar_visible=False
                self.fig.canvas.show()   
    def update_data_plot(self,event):
        """
        the main method for updating plots
        
        Parameters
        ----------
        
        event : ipywidget event
        """
        # Extract data
        df_sku = df[(df.SKU==self.sku_w.value)&(df["Distribution Center"].isin(self.dis_center_w.value))]
        
        # update time-domain plots
        self.forecast_plot.set_data(df_sku.Week,df_sku["Forecasted Demand"])
        self.orders_plot.set_data(df_sku.Week,df_sku["Customer Orders"])
        self.ax[0].set_xlim(0,df_sku.Week.values.max())
        all_ys = np.r_[df_sku["Forecasted Demand"].values, df_sku["Customer Orders"].values]
        if self.y_scale_w.value=="lin":
            ylim = [0,all_ys.max()]
        else:
            ylim = [all_ys.min()/2,all_ys.max()*2]
        self.ax[0].set(yscale=self.y_scale_w.value)
        self.ax[0].set_ylim(ylim)
        
        # update distribution plots
        self.show_distribution(df_sku["Forecasted Demand"].values,self.ax[1], color="C0")
        self.show_distribution(df_sku["Customer Orders"].values,self.ax[2], color="C1")
        self.fig.canvas.draw()
    
        # show the optional data frame header 
        with self.display_w:        
            clear_output()
            if self.show_header_w.value:
                display(Markdown(f"Number of rows = {len(df_sku)}"))
                display(df_sku.head(5))                 
    def show_distribution(self, x, ax_, color):
            """
            show histograms and (for linear scale gaussian kde)
            
            Parameters
            ----------
            
            x : NumPy array of floats
                variable to histogram (orders or forecasts)
                
            ax_ : Matplotlib axes object
                  axes where to put data
            
            color : a valid matplotlib color designation
                    e.g. relative colors of the active theme "C0", "C1", etc.; or absolute color "r", "b", etc.
            """
            try:
                if color=="C0":
                    _ = [b.remove() for b in self.bars1]
                else:
                    _ = [b.remove() for b in self.bars2]
            except:
                pass
            x = x[x>0]
            if len(x)>2:
                m, M = x.min(),x.max()
                p_orders = gkde(x)
                if self.y_scale_w.value=="log":
                    bins = np.logspace(np.log10(m),np.log10(M),20)
                    if color=="C0":
                        c,_,self.bars1=ax_.hist(x,bins,orientation="horizontal",label="Forecast",alpha=.4,density=False,color=color)
                    else:
                        c,_,self.bars2=ax_.hist(x,bins,orientation="horizontal",label="Forecast",alpha=.4,density=False,color=color)
                elif self.y_scale_w.value=="linear":
                    bins = np.linspace(m,M,20)
                    x_ = np.linspace(m,M,200)
                    if color=="C0":
                        c,_,self.bars1=ax_.hist(x,bins,orientation="horizontal",label="Forecast",alpha=.4,density=True,color=color)
                    else:
                        c,_,self.bars2=ax_.hist(x,bins,orientation="horizontal",label="Forecast",alpha=.4,density=True,color=color)
                ax_.set(xlim=[0,1.05*c.max()])
intelViewer = IntelDataViewer()

VBox(children=(HBox(children=(SelectMultiple(description='Center', index=(0,), options=('ALPHA', 'BETA', 'GAMMâ€¦

## Simpler plotting function

In [None]:
def get_SKU(sku, distribution_center, y_scale="log",show_header=False, show_plot=True, return_subdf=False):
    """
    gets Forecasted Demand and Customer Orders per Week
    
    Parameters
    ----------
    
    sku : string
          specific SKU from the dataset
    """
    
    df_sku = df[(df.SKU==sku)&(df["Distribution Center"].isin(distribution_center))]
    if show_header:
        display(df_sku.head(5))
    if show_plot:
        with plt.ioff():
            num="SKU view"
            if plt.fignum_exists(num):plt.close(num)
            fig,ax = plt.subplots(num=num,ncols=3,sharey=True,figsize=(12,5),
                                  gridspec_kw=dict(width_ratios=[4,1,1],wspace=.05,
                                                  top=.95,right=.99))
            # data in time domain
            ax[0].plot(df_sku.Week, df_sku["Forecasted Demand"],label="Forecast",lw=0,marker="o",alpha=.4)
            ax[0].plot(df_sku.Week, df_sku["Customer Orders"],label="Orders",lw=0,marker="o",alpha=.4)
            ax[0].legend(frameon=False)
            ax[0].set(xlabel="Time $t$ [Week]",
                      ylabel="Forecast and orders [Count]",
                      title=f"Number of rows = {len(df_sku)}",
                      yscale=y_scale)
            
            # data histograms
            def show_distribution(x=df_sku["Forecasted Demand"].values, y_scale=y_scale, ax_=ax[1],color="C0"):
                """
                show histograms and (for linear scale gaussian kde)
                """
                x = x[x>0]
                if len(x)>2:
                    m, M = x.min(),x.max()
                    p_orders = gkde(x)
                    if y_scale=="log":
                        bins = np.logspace(np.log10(m),np.log10(M),20)
                        ax_.hist(x,bins,orientation="horizontal",label="Forecast",alpha=.4,density=False,color=color)
                    else:
                        bins = np.linspace(m,M,20)
                        x_ = np.linspace(m,M,200)
                        ax_.hist(x,bins,orientation="horizontal",label="Forecast",alpha=.4,density=True,color=color)
                        ax_.plot(p_orders(x_),x_,color=color)

                
            
            show_distribution()
            show_distribution(x =  df_sku["Customer Orders"].values, y_scale=y_scale, ax_=ax[2],color="C1")
                          
            fig.canvas.header_visible=False
            fig.canvas.toolbar_visible=False
            fig.canvas.show()
    if return_subdf:
        return(df_sku)

widgets.interact(get_SKU, sku=widgets.Dropdown(options=SKUs),show_plot=widgets.fixed(True),return_subdf=widgets.fixed(False),
                 distribution_center=widgets.SelectMultiple(options=distribution_centers,value=[distribution_centers[0],]),
                 y_scale=widgets.RadioButtons(options=["log","linear"],description="$y$-scale"))