In [1]:
import plotly.express as px
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from xgboost import plot_importance
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
from joblib import dump, load
import statistics


def plot_features(booster, figsize):
    fig, ax = plt.subplots(1, 1, figsize=figsize)
    return plot_importance(booster=booster, ax=ax)

In [9]:
class PredictionModel:
    def __init__(self):
        self.import_data()
        self.convert_to_string()
        self.down_cast()
        self.clean_lag()
        self.one_hot_encode_lag()

    def downcast_dtypes(self, df):
        float_cols = [c for c in df if df[c].dtype == "float64"]
        int_cols = [c for c in df if df[c].dtype in ["int64", "int32"]]
        df[float_cols] = df[float_cols].astype(np.float32)
        df[int_cols] = df[int_cols].astype(np.int16)
        return df

    def import_data(self):
        #self.train = pd.read_csv('sales_train.csv')
        self.items = pd.read_csv('items.csv')
        #self.item_cats = pd.read_csv('item_categories.csv')
        self.items_t = pd.read_csv('items_translated_text.csv')
        self.train_grouped_month = pd.read_csv('month_lag_grouped.csv')
        self.train_lag_new = pd.read_csv('new_month_group.csv')
        
    def convert_to_string(self):
        # nominal intergers can not be converted to binary encoding, convert to string
        #self.train_lag_new['date_block_num'] = [('month ' + str(i)) for i in month_group2['date_block_num']]
        self.train_lag_new['shop_id'] = [('shop ' + str(i)) for i in self.train_lag_new['shop_id']]
        self.train_lag_new['item_category_id'] = [('item_category ' + str(i)) for i in self.train_lag_new['item_category_id']]
        self.train_lag_new['item_id'] = [('item ' + str(i)) for i in self.train_lag_new['item_id']]

    def down_cast(self):
        #self.train = self.downcast_dtypes(self.train)
        self.items = self.downcast_dtypes(self.items)
        self.train_grouped_month = self.downcast_dtypes(self.train_grouped_month)
        #self.item_cats = self.downcast_dtypes(self.item_cats)
        self.train_lag_new = self.downcast_dtypes(self.train_lag_new)

    def clean_lag(self):
        self.train_lag_new = self.train_lag_new.dropna()
    
    def one_hot_encode_lag(self):
        self.ct = ColumnTransformer([('encoder', OneHotEncoder(), [0, 1, 2, 3])], remainder = 'passthrough')
        self.ct.fit(self.train_lag_new.iloc[:, 1:-1].values)
        
    def run_lag_model(self):
        self.regressor = load('ridge_regressor_model2.joblib')
        
    def get_z_list_lag(self, shop_id_num, item_id_num):
        # item_cat = items.loc[items['item_id'] == item_id_num, ['item_category_id']].values[0][0]
        self.item_cat = self.items[self.items['item_id'] == item_id_num]['item_category_id'].values[0]
        self.prices = self.train_grouped_month.loc[self.train_grouped_month['item_id'] == item_id_num, ['item_price']].values
        self.price = statistics.mode(self.prices[0])
        date_num = 34
        new_pd = self.train_grouped_month.loc[self.train_grouped_month['date_block_num'] == date_num - 1].loc[
            self.train_grouped_month['shop_id'] == shop_id_num].loc[self.train_grouped_month['item_id'] == item_id_num]
        new_pd2 = self.train_grouped_month.loc[self.train_grouped_month['date_block_num'] == date_num - 2].loc[
            self.train_grouped_month['shop_id'] == shop_id_num].loc[self.train_grouped_month['item_id'] == item_id_num]
        new_pd3 = self.train_grouped_month.loc[self.train_grouped_month['date_block_num'] == date_num - 3].loc[
            self.train_grouped_month['shop_id'] == shop_id_num].loc[self.train_grouped_month['item_id'] == item_id_num]
        new_pd4 = self.train_grouped_month.loc[self.train_grouped_month['date_block_num'] == date_num - 4].loc[
            self.train_grouped_month['shop_id'] == shop_id_num].loc[self.train_grouped_month['item_id'] == item_id_num]
        new_pd5 = self.train_grouped_month.loc[self.train_grouped_month['date_block_num'] == date_num - 5].loc[
            self.train_grouped_month['shop_id'] == shop_id_num].loc[self.train_grouped_month['item_id'] == item_id_num]
        # print(len(new_pd['date_block_num']))
        if len(new_pd['shop_id']) > 0:
            mon1 = self.train_grouped_month['item_cnt_day'][new_pd.index[0]]
        else:
            mon1 = 0.0

        if len(new_pd2['shop_id']) > 0:
            mon2 = self.train_grouped_month['item_cnt_day'][new_pd2.index[0]]
        else:
            mon2 = 0.0

        if len(new_pd3['shop_id']) > 0:
            mon3 = self.train_grouped_month['item_cnt_day'][new_pd3.index[0]]
        else:
            mon3 = 0.0

        if len(new_pd4['shop_id']) > 0:
            mon4 = self.train_grouped_month['item_cnt_day'][new_pd4.index[0]]
        else:
            mon4 = 0.0

        if len(new_pd5['shop_id']) > 0:
            mon5 = self.train_grouped_month['item_cnt_day'][new_pd5.index[0]]
        else:
            mon5 = 0.0

        return ['november', 'shop ' + str(shop_id_num), 'item_category ' + str(self.item_cat), 'item ' + str(item_id_num),
             self.price, mon1, mon2, mon3, mon4, mon5]
        
    def predict_month(self, shop_id_num, item_id_num):
        z = self.get_z_list_lag(shop_id_num, item_id_num)
        z = np.array(z, dtype=object).reshape(1, -1)
        z = self.ct.transform(z)
        z_pred = self.regressor.predict(z)

        return round(z_pred[0], 3)
    
    def create_one_shop_one_item_df(self, itemid, shopid):
        self.one_shop_df = self.train_grouped_month[self.train_grouped_month['shop_id'] == shopid]
        self.one_shop_one_item_df = self.one_shop_df[self.one_shop_df['item_id'] == itemid]


    def create_one_shop_df(self, shopid):
        self.one_shop_df = self.train_grouped_month[self.train_grouped_month['shop_id'] == shopid]

    def create_3d_scatter_fig(self):
        self.fig = px.scatter_3d(self.one_shop_one_item_df, x='date_block_num', y='item_price', z='item_cnt_day', color='item_price')

    def get_translated_name(self, itemid):
        self.t_name = self.items_t[self.items_t['item_id'] == itemid]['english_name']

    def get_valid_item_list(self, shopid):
        self.one_shop = self.train_grouped_month[self.train_grouped_month['shop_id'] == shopid]
        self.valid_items = list(self.one_shop.item_id.unique())

    def get_valid_shops_list(self):
        self.valid_shops = list(self.train_grouped_month.shop_id.unique())

    def convert_list_to_options_dict_items(self):
        self.list_of_dicts_items = []

        for item in self.valid_items:
            temp_dict = {'label': item, 'value': item}
            self.list_of_dicts_items.append(temp_dict)

    def convert_list_to_options_dict_shops(self):
        self.list_of_dicts_shops = []

        for item in self.valid_shops:
            temp_dict = {'label': item, 'value': item}
            self.list_of_dicts_shops.append(temp_dict)

In [10]:
sample_model = PredictionModel()

sample_model.run_lag_model()



In [13]:
sample_shop_id = 55
sample_item_id = 492

sample_model.create_one_shop_df(sample_shop_id)
sample_model.create_one_shop_one_item_df(sample_shop_id, sample_item_id)
sample_model.create_3d_scatter_fig()

# gets the list of items that are sold in a particular shop to put into the drop down menu
sample_model.get_valid_item_list(sample_shop_id)
sample_model.convert_list_to_options_dict_items()

# gets a list of the valid shop_id's to display in the drop down menu
sample_model.get_valid_shops_list()
sample_model.convert_list_to_options_dict_shops()


# basic stylesheet
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
styles = {
    'pre': {
        'border': 'thin lightgrey solid',
        'overflowX': 'scroll'
    }
}


# App layout
app.layout = html.Div([

    html.H1("Sales Forecasting", style={'text-align': 'center'}),
    html.H2("Enter Shop ID and Item ID to see next months predicted sales count for that item.",
            style={'text-align': 'center'}),

    # contains the graph
    html.Div([
        dcc.Graph(
            id='3d-scatter',
            figure={})]),
    html.Br(),

    html.H2([
        'Predicted Sales for Next Month: ',
        html.H2(
            id='prediction_count')], style={'text-align': 'center'}),

    # contains the shop id drop down menu
    html.Div(['Shop ID: ',
              dcc.Dropdown(
                  id='shop-dropdown',
                  options=sample_model.list_of_dicts_shops,
                  placeholder="Select a Shop ID (0-60)"
              )
              ]),

    # contains the item id drop down menu
    html.Div(['Item ID: ',
              dcc.Dropdown(
                  id='item-dropdown',
                  options=sample_model.list_of_dicts_items,
                  placeholder="Select an Item ID"
              )
              ]),

    # displays the item name
    html.Div([
        'Item Name: ',
        html.Div(
            id='item_name')]),

    # contains the submit button
    html.Div(html.Button(id='submit-button-state', n_clicks=0, children='Show Sales Graph')),
    html.Br(),

    html.Div(html.Button(id='predict-button-state', n_clicks=0, children='Predict Next Month')),
    html.Br(),

])

# Connects the selected shop_id to the item_id drop down with valid item_id's
@app.callback(
    Output(component_id='item-dropdown', component_property='options'),
    Input('shop-dropdown', 'value')
)
def update_dropdown_option(shop_id_from_dropdown):
    sample_model.get_valid_item_list(shop_id_from_dropdown)
    sample_model.convert_list_to_options_dict_items()
    return sample_model.list_of_dicts_items


# Connect the Plotly graphs with Dash drop down Components
@app.callback(
    [Output(component_id='3d-scatter', component_property='figure'),
     Output(component_id='item_name', component_property='children')],
    [Input('submit-button-state', 'n_clicks')],
    [State("shop-dropdown", "value"),
     State("item-dropdown", "value")]
)
def update_graph(n_clicks, input_shop_id, input_item_id):
    sample_model.create_one_shop_one_item_df(input_item_id, input_shop_id)
    sample_model.create_3d_scatter_fig()
    sample_model.get_translated_name(input_item_id)
    return sample_model.fig, 'Item Name: '.join(sample_model.t_name)


@app.callback(
    Output(component_id='prediction_count', component_property='children'),
    [Input('predict-button-state', 'n_clicks')],
    [State("shop-dropdown", "value"),
     State("item-dropdown", "value")]
)
def predict(n_clicks, input_shop_id, input_item_id):
    return sample_model.predict_month(input_shop_id, input_item_id)


# runs the whole thing
if __name__ == '__main__':
    app.run_server(debug=False)

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


 * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
127.0.0.1 - - [23/Sep/2020 21:20:52] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:20:54] "[37mGET /_dash-component-suites/dash_renderer/prop-types@15.v1_8_0m1600103338.7.2.min.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:20:54] "[37mGET /_dash-component-suites/dash_renderer/react@16.v1_8_0m1600103338.13.0.min.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:20:54] "[37mGET /_dash-component-suites/dash_renderer/polyfill@7.v1_8_0m1600103338.8.7.min.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:20:54] "[37mGET /_dash-component-suites/dash_renderer/dash_renderer.v1_8_0m1600103338.min.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:20:54] "[37mGET /_dash-component-suites/dash_core_components/dash_core_components.v1_12_0m1600103340.min.js HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:20:54] "[37mGET /_dash-component-suites/dash_renderer/react-dom@16.v1_8_0m1600103338.13.0.min.js HTT

Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\kishe\anaconda3\lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\kishe\anaconda3\lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\kishe\anaconda3\lib\site-packages\flask\app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\Users\kishe\anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "C:\Users\kishe\anaconda3\lib\site-packages\flask\app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\kishe\anaconda3\lib\site-packages\flask\app.py", line 1936, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "C:\Users\kishe\anaconda3\lib\site-packages\dash\dash.py", line 1059, in dispatch
    response.set_d

127.0.0.1 - - [23/Sep/2020 21:20:59] "[35m[1mPOST /_dash-update-component HTTP/1.1[0m" 500 -
127.0.0.1 - - [23/Sep/2020 21:21:08] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:21:12] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:21:21] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:21:30] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:21:35] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:21:38] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:25:18] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:25:23] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:25:26] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [23/Sep/2020 21:26:10] "[37mPOST /_dash-update-component HTTP

Exception on /_dash-update-component [POST]
Traceback (most recent call last):
  File "C:\Users\kishe\anaconda3\lib\site-packages\flask\app.py", line 2447, in wsgi_app
    response = self.full_dispatch_request()
  File "C:\Users\kishe\anaconda3\lib\site-packages\flask\app.py", line 1952, in full_dispatch_request
    rv = self.handle_user_exception(e)
  File "C:\Users\kishe\anaconda3\lib\site-packages\flask\app.py", line 1821, in handle_user_exception
    reraise(exc_type, exc_value, tb)
  File "C:\Users\kishe\anaconda3\lib\site-packages\flask\_compat.py", line 39, in reraise
    raise value
  File "C:\Users\kishe\anaconda3\lib\site-packages\flask\app.py", line 1950, in full_dispatch_request
    rv = self.dispatch_request()
  File "C:\Users\kishe\anaconda3\lib\site-packages\flask\app.py", line 1936, in dispatch_request
    return self.view_functions[rule.endpoint](**req.view_args)
  File "C:\Users\kishe\anaconda3\lib\site-packages\dash\dash.py", line 1059, in dispatch
    response.set_d

127.0.0.1 - - [24/Sep/2020 00:30:59] "[35m[1mPOST /_dash-update-component HTTP/1.1[0m" 500 -
127.0.0.1 - - [24/Sep/2020 00:31:05] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [24/Sep/2020 00:31:23] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [24/Sep/2020 00:31:27] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [24/Sep/2020 00:31:57] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [24/Sep/2020 00:32:11] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [24/Sep/2020 00:32:13] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [24/Sep/2020 00:32:31] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [24/Sep/2020 00:32:41] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
127.0.0.1 - - [24/Sep/2020 00:32:46] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
