In [31]:
# Basic operations
import numpy as np
import pandas as pd

# Dashboard
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

# Plotly
import plotly.express as px
import plotly.graph_objects as go

# Forecast
from fbprophet import Prophet

# SQL
%run -i "Query_Function.py"

# Disable warnings
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import logging
logger = logging.getLogger()
logger.setLevel(logging.CRITICAL)

# Query data from the remote database

## Step 1:
* Query a list of all the countries (SQL)
* Display the list for user selection (Dash)

## Step 2:
* Query and save as a dataframe - date, royalties, and movie ID by country (SQL)
* Query a table mapping movie titles to movie ID (SQL)
* Obtain a list of movies in that particular country (Pandas)
* Display the list for user selection (Dash)

## Step 3:
* Filter out the dataframe by movie id (Pandas)
* Pass the dataframe to FP prophet

In [2]:
# Query a list of all the countries (SQL)
query_1 = '''
SELECT DISTINCT country_name AS Country
FROM dim_user
'''
df_countries = data(query_1)
df_countries.dropna(axis=0, inplace=True)

In [3]:
# Query a table mapping movie titles to movie ID (SQL)
query_2 = '''
SELECT original_title AS Title, 
       movie_metadata_id AS Movie_id
FROM dim_movie_metadata
'''
df_titles_to_id = data(query_2)
df_titles_to_id.dropna(axis=0, inplace=True)

In [18]:
# Query and save as a dataframe - date, royalties, and movie ID by country (SQL)
query_3 = '''
SELECT d.date_value AS Date,
       s.royalties_paid AS Royalties,
       s.movie_metadata_id AS Movie_id

FROM fact_sales AS s
INNER JOIN dim_user as u
ON s.user_id = u.user_id
AND u.country_name = "{}"
INNER JOIN dim_date d
ON s.download_date_pst_id = d.date_id;
'''
#df_big = data(query_3.format('France'))

In [38]:
# Pass the dataframe to FP prophet

# A function to wrap up the forecast
def forecast_wrap(data = df_big, movie = 'Requiem for a Dream', h = 3):

    # Filter out the dataframe by movie id (Pandas)
    movie_id = df_titles_to_id[df_titles_to_id['Title']==movie].iloc[0,1]
    df = data[data['Movie_id'] == movie_id]

    # Reset the index
    df.set_index('Date', inplace=True)
    df.sort_index(inplace=True)
    df.index = pd.to_datetime(df.index)

    # Remove incomplete data from the current month
    end_of_last_month = pd.to_datetime('today') + pd.tseries.offsets.MonthEnd(-1)
    df = df[:end_of_last_month]

    # Make a dataframe for FP prophet
    df = df.resample('1M')['Royalties'].sum()
    df = df.reset_index()
    df.columns = ['ds', 'y']

    # Fit the model
    # Log transformation to avoid negative predictions
    df['y'] = np.log(df['y'] + 1) #Plus 1 to avoid running into 0-revenue months
    model = Prophet(seasonality_mode='multiplicative')
    model.add_seasonality(name='monthly', period=30.5, fourier_order=5)
    model.fit(df)
    future = model.make_future_dataframe(periods = h, freq = 'M')
    forecast = model.predict(future)
    df['y'] = np.exp(df['y']) - 1
    forecast['yhat'] = np.exp(forecast['yhat'])
    forecast['yhat_upper'] = np.exp(forecast['yhat_upper'])
    forecast['yhat_lower'] = np.exp(forecast['yhat_lower'])

    # Extract confidence intervals
    conf = forecast[['ds','yhat_upper', 'yhat_lower']]
    conf = conf.iloc[-h:]
    conf = conf.append({'ds':df.iloc[-1,0], 
                        'yhat_upper':df.iloc[-1,1], 
                        'yhat_lower':df.iloc[-1,1]}, 
                        ignore_index=True)
    conf = conf.sort_values('ds')

    # Put the forecast and factual data into the same dataframe
    df['type'] = 'past'

    forecast = forecast.iloc[-h:]
    forecast = forecast[['ds', 'yhat']]
    forecast.columns = ['ds','y']
    forecast['type'] = 'forecast'
    df = pd.concat([df, forecast],axis=0)

    # Round the number down to 2 digits after the decimal points
    df['y'] = np.round(df['y'], 2)

    # Plot the result
    m_past = df['type'] == 'past'
    m_fore = df['type'] == 'forecast'

    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df['ds'][m_past], y=df['y'][m_past], mode='lines', name='past'))
    fig.add_trace(go.Scatter(x=df['ds'][m_fore], y=df['y'][m_fore], mode='lines+markers', name='forecast'))
    fig.add_trace(go.Scatter(x=[df['ds'][m_past].iloc[-1], df['ds'][m_fore].iloc[0]], 
                            y=[df['y'][m_past].iloc[-1], df['y'][m_fore].iloc[0]], 
                            mode='lines', name='forecast', 
                            hoverinfo='skip', line_color='#ff7f0e', showlegend=False))

    # Confidence interval
    fig.add_trace(go.Scatter(x=conf['ds'], y=conf['yhat_lower'], 
                             mode='lines', name='possible range', 
                             fill=None, line_color='#ff7f0e', opacity=0.1))
    fig.add_trace(go.Scatter(x=conf['ds'], y=conf['yhat_upper'], 
                             mode='lines', name='possible range', 
                             fill='tonexty', line_color='#ff7f0e', opacity=0.1))

    #The line connecting past and forecasted points

    fig.update_layout(title='Royalties Forecasting for ' + movie,
                    xaxis_title='Year',
                    yaxis_title='Royalities (in euros)')
    return fig

In [39]:
# Build a dashboard
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

app.layout = html.Div([
    
    html.Div([
    dcc.Dropdown(
                id='country',
                options=[{'label': i, 'value': i} for i in df_countries['Country'].to_list()],
                value='Belgium'
            ),
    dcc.Dropdown(
                id='movie'
            ),        
    ]),
    
    dcc.Graph(id='forecast'),

])

@app.callback(
    Output('movie', 'options'),
    [Input('country', 'value')])
def update_movie_list(country):
    # Declare a global variable
    global df_big
    
    df_big = data(query_3.format(country))
    df_movie = df_titles_to_id[df_titles_to_id['Movie_id'].isin(df_big['Movie_id'].unique())]
    return [{'label': i, 'value': i} for i in df_movie['Title'].to_list()]

@app.callback(
    Output('forecast', 'figure'),
    [Input('movie', 'value')])
def update_figure(movie):
    # Declare a global variable
    global df_big
    
    return forecast_wrap(df_big, movie)

if __name__ == '__main__':
    app.run_server(debug=False)

 * Serving Flask app "Query_Function" (lazy loading)
 * Environment: production
   Use a production WSGI server instead.
 * Debug mode: off


INFO:werkzeug: * Running on http://127.0.0.1:8050/ (Press CTRL+C to quit)
INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:31:57] "[37mGET / HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:31:57] "[37mGET /_dash-layout HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:31:57] "[37mGET /_dash-dependencies HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:31:57] "[1m[35mPOST /_dash-update-component HTTP/1.1[0m" 500 -


None


INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:32:05] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:32:08] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


L'affaire Tournesol


INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:32:33] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


El esfuerzo y el ánimo


INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:32:39] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


Gekijô ban Naruto: Shippûden - Kizuna


INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:32:46] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


Kill Speed


INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:32:52] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


La leyenda


INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:33:05] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


Dune


INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:33:12] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:34:31] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -


Dune


INFO:werkzeug:127.0.0.1 - - [01/Feb/2020 20:34:45] "[37mPOST /_dash-update-component HTTP/1.1[0m" 200 -
