<a href="https://colab.research.google.com/github/Pac1226/mercado-libre-prophet-forecasting/blob/main/mercadolibre_forecasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install the required libraries
!pip install pystan
!pip install prophet
!pip install prophet.plot
!pip install plotly
!pip3 install --quiet datapane

In [2]:
# Import the required libraries and dependencies
import pandas as pd
import numpy as np
import pystan
import datetime as dt
import datapane as dp
from prophet import Prophet
from google.colab import files
import plotly.graph_objects as go
from plotly.subplots import make_subplots
from prophet.plot import plot_plotly, plot_components_plotly
%matplotlib inline

In [3]:
datapane_api_token = "08eec3df067a7d85465c27b2b450affc07e8607d" #@param {type:"string"}
datapane_server_url = "https://datapane.com" #@param {type:"string"}

In [4]:
dp.login(token=datapane_api_token)

[32mConnected successfully to https://datapane.com as peterlieberman212@gmail.com[0m


'peterlieberman212@gmail.com'

In [None]:
from google.colab import drive
drive.mount('/content/drive')

## Part 1: Data Aggregation: Historical Sales & Search Activity

In [None]:
# Uploads daily sales data from a CSV file
uploaded = files.upload()
sales_df = pd.read_csv("mercado_daily_revenue.csv", index_col='Date', parse_dates=True, infer_datetime_format=True)
sales_df.columns = ["Revenue"]
daily_sales_df = sales_df.groupby(by=sales_df.index.dayofweek).mean()
weekly_sales_df = sales_df.groupby(by=sales_df.index.week).mean()
monthly_sales_df = sales_df.groupby(by=sales_df.index.month).mean()

In [None]:
# Uploads hourly search data from a CSV file
uploaded = files.upload()
hourly_search_df = pd.read_csv("google_hourly_search_trends.csv", index_col='Date', parse_dates=True, infer_datetime_format=True).dropna()
daily_search_df = hourly_search_df.groupby(by=hourly_search_df.index.dayofweek).mean()
weekly_search_df = hourly_search_df.groupby(hourly_search_df.index.isocalendar().week).mean()
monthly_search_df = hourly_search_df.groupby(by=hourly_search_df.index.month).mean()

In [7]:
hourly_search_df["Search Trends"] = hourly_search_df["Search Trends"] * 100

In [8]:
# Combines search/sales data into daily, weekly, and monthly DataFrames
daily_df = pd.concat([daily_sales_df, daily_search_df], axis="columns", join="inner").round(2)
monthly_df = pd.concat([monthly_sales_df, monthly_search_df], axis="columns", join="inner").round(2)
weekly_df = pd.concat([weekly_sales_df, weekly_search_df], axis="columns", join="inner").round(2)

# Renames rows
daily_df.index = ["Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday", "Sunday"]
monthly_df.index = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]

## Part 2: Seasonality: Revenue & Search Activity

When is consumer interest in the company highest? Are financial and operating strategies aligned?

In [None]:
# Monthly Plot
monthly_fig = make_subplots(specs=[[{"secondary_y" : True}]])
monthly_fig.add_trace(go.Scatter(name='Search', x=monthly_df.index, y=monthly_df["Search Trends"], line_color="gold"),  secondary_y=True),
monthly_fig.add_trace(go.Bar(name='Revenue', x=monthly_df.index, y=monthly_df["Revenue"],  marker_color = "midnightblue"), secondary_y=False)
monthly_fig.update_yaxes(title_text="Search Activity", secondary_y=True)
monthly_fig.update_yaxes(title_text="Revenue (Millions)", tickformat = '$', secondary_y=False)
monthly_fig.update_layout(barmode='group')
monthly_fig.update_layout(template="simple_white")
monthly_fig.update_layout(width=1000, height=500)
monthly_fig.update_layout(title="Monthly Revenue & Search Activity")
monthly_fig.update_layout(legend=dict(orientation="h", yanchor="top", y=1.1, xanchor="left", x=0, font = dict(size = 10, color = "black")))
monthly_fig.show()

In [None]:
# Weekly Plot
weekly_fig = make_subplots(specs=[[{"secondary_y" : True}]])
weekly_fig.add_trace(go.Scatter(name='Search', x=weekly_df.index, y=weekly_df["Search Trends"], line_color="gold"),  secondary_y=True),
weekly_fig.add_trace(go.Bar(name='Revenue', x=weekly_df.index, y=weekly_df["Revenue"], marker_color = "midnightblue"), secondary_y=False)
weekly_fig.update_yaxes(title_text="Search Activity", secondary_y=True)
weekly_fig.update_yaxes(title_text="Revenue (Millions)", tickformat = '$', secondary_y=False)
weekly_fig.update_layout(
    xaxis = dict(
        tickmode = 'array',
        tickvals = [1, 14, 27, 40],
        ticktext = ['Q1', 'Q2', 'Q3', 'Q4']
    )
)
weekly_fig.update_layout(barmode='group')
weekly_fig.update_layout(template="simple_white")
weekly_fig.update_layout(width=1000, height=500)
weekly_fig.update_layout(title="Weekly Revenue & Search Activity")
weekly_fig.update_layout(legend=dict(orientation="h", yanchor="top", y=1.1, xanchor="left", x=0, font = dict(size = 10, color = "black")))
weekly_fig.show()

In [None]:
# Daily Plot
daily_fig = make_subplots(specs=[[{"secondary_y" : True}]])
daily_fig.add_trace(go.Scatter(name='Search', x=daily_df.index, y=daily_df["Search Trends"], line_color="gold"),  secondary_y=True),
daily_fig.add_trace(go.Scatter(name='Revenue', x=daily_df.index, y=daily_df["Revenue"], line_color="midnightblue"), secondary_y=False)
daily_fig.update_yaxes(title_text="Search Activity", secondary_y=True)
daily_fig.update_yaxes(title_text="Sales (Millions)", tickformat = '$', secondary_y=False)
daily_fig.update_layout(barmode='group')
daily_fig.update_layout(template="simple_white")
daily_fig.update_layout(width=1000, height=500)
daily_fig.update_layout(title="Daily Revenue & Search Activity")
daily_fig.update_layout(legend=dict(orientation="h", yanchor="top", y=1.1, xanchor="left", x=0, font = dict(size = 10, color = "black")))
daily_fig.show()

In [None]:
# Hourly Plot (Search Only)
hourly_fig = go.Figure()
hourly_fig.add_trace(
 go.Heatmap(x = hourly_search_df.index.hour, y = hourly_search_df.index.dayofweek, z= hourly_search_df["Search Trends"], coloraxis = "coloraxis"))
hourly_fig.update_yaxes(title_text="Search Activity")
hourly_fig.update_xaxes(title_text="Hour of Day")
hourly_fig.update_layout(coloraxis = {'colorscale':'ylgnbu'})
hourly_fig.update_layout(width=1000, height=500)
hourly_fig.update_layout(title="Hourly Search Activity")

In [None]:
dp.Report(
            dp.Select(blocks=[
                dp.Plot(daily_fig, label='Day of Week'),
                dp.Plot(monthly_fig, label='Month of Year'),
                dp.Plot(weekly_fig, label='Week of Year'),
                dp.Plot(hourly_fig, label='Hour of Day')
            ])
        ).save(path='seasonality_trends.html', open=True)   

## Q3 Historical Trends

- How does search traffic Jul-Sep compare to the rest of the year?
- Is there a relationship between search activity and revenue?
- When are sales and interst highest? Lowest?

In [None]:
# Uploads daily search data from a CSV file
uploaded = files.upload()
search_trends_daily_df = pd.read_csv("google_daily_search_trends.csv", index_col='Date', parse_dates=True, infer_datetime_format=True).dropna()
search_trends_daily_df["Search Trends"] = search_trends_daily_df["Search Trends"] * 100
search_trends_daily_df = search_trends_daily_df.round(0)

In [15]:
# Median monthly search traffic and average search traffic by month
median_monthly_traffic = hourly_search_df.groupby(by=[hourly_search_df.index.year, hourly_search_df.index.month]).sum().median()
monthly_traffic_df = hourly_search_df.groupby(by=[hourly_search_df.index.month, hourly_search_df.index.year]).sum()
df_jul_traffic = monthly_traffic_df.loc[7].mean()
df_aug_traffic = monthly_traffic_df.loc[8].mean()
df_sep_traffic = monthly_traffic_df.loc[9].mean()

# Average daily traffic over entire timeperiod and in Q3
daily_traffic_df = search_trends_daily_df.groupby(by=search_trends_daily_df.index.dayofyear).mean()
daily_traffic_df["Date"] = pd.date_range(start='1/1/19', periods=len(daily_traffic_df), freq='D')
daily_traffic_df["Date"] = daily_traffic_df["Date"].dt.strftime('%Y-%m-%d')
daily_traffic_df = daily_traffic_df.set_index("Date")
q3_traffic = daily_traffic_df.loc["2019-07-01": "2019-09-30"]

# Compare the average monthly search traffic in Q3 to the overall monthly median value
print("Median Monthly Search Traffic =", median_monthly_traffic["Search Trends"])
print("Jul Search Traffic =", df_jul_traffic )
print("Aug Search Traffic =", df_aug_traffic)
print("Sep Search Traffic =", df_sep_traffic)

Median Monthly Search Traffic = 3517250.0
Jul Search Traffic = Search Trends    3550440.0
dtype: float64
Aug Search Traffic = Search Trends    3573080.0
dtype: float64
Sep Search Traffic = Search Trends    2868660.0
dtype: float64


In [16]:
# Creates new DataFrame for Q3 Revenues
q3_daily_sales = sales_df.loc["2019-07-01" : "2019-09-30"]

In [None]:
# Plot comparing search activity to revenue in Q3
q3_fig = make_subplots(specs=[[{"secondary_y" : True}]])
q3_fig.add_trace(go.Scatter(x=q3_traffic.index, y=q3_traffic["Search Trends"], name = "Search Trends", mode="lines+text", line_color="gold"), secondary_y=False)
q3_fig.add_trace(go.Scatter(x=q3_traffic.index, y=q3_daily_sales["Revenue"], name = "Daily Revenue", line_color="midnightblue"), secondary_y=True)
q3_fig.update_layout(template="simple_white")
q3_fig.update_xaxes(title_text = "Q3", nticks=5, tickformat = "%m/%d")
q3_fig.update_yaxes(title_text="Search Activity (Average of 2016-2019)", secondary_y=False)
q3_fig.update_yaxes(range=[90000, 125000], secondary_y=False)
q3_fig.update_yaxes(title_text="2019 Daily Revenue (Millions)", tickformat = '$', secondary_y=True)
q3_fig.update_layout(legend=dict(orientation="h", yanchor="top", y=1.1, xanchor="left", x=0, font = dict(size = 10, color = "black")))
q3_fig.update_layout(width=1000, height=500)
q3_fig.add_annotation(x="2019-09-16", y= "122000", text = "Mexican Independence Day", secondary_y=False)
q3_fig.add_annotation(x="2019-07-16", y= "120000", text = "Brazilian Public Holiday", secondary_y=False)
q3_fig.add_annotation(x="2019-08-07", y= "120000", text = "Q2 Earnings Release", secondary_y=False)
q3_fig.update_layout(title="Q3 Search Activity (2016-19) vs Revenue (2019)")

In [67]:
q3_fig.write_html("q3_fig.html")

## Part 2: Timeseries Forecasting with Prophet

Question: What is the sales forecast for Q3 2020?

In [18]:
# Reformats data for model
mercado_sales_prophet_df = sales_df.reset_index()
mercado_sales_prophet_df.columns = ["ds", "y"]

# Applies the model to the data
mercado_sales_prophet_model = Prophet()
mercado_sales_prophet_model.fit(mercado_sales_prophet_df)

# Plots forecast
mercado_sales_prophet_future = mercado_sales_prophet_model.make_future_dataframe(periods=90, freq="D")
mercado_sales_prophet_forecast = mercado_sales_prophet_model.predict(mercado_sales_prophet_future)
revenue_prophet_plot = plot_plotly(mercado_sales_prophet_model, mercado_sales_prophet_forecast)

In [19]:
# Saves forecast as html for embedding
revenue_prophet_plot.write_html("revenue_prophet_plot.html")

In [None]:
revenue_prophet_plot

In [21]:
# Base, Bear, and Bull cases
mercado_sales_prophet_forecast = mercado_sales_prophet_forecast.set_index('ds')
mercado_sales_forecast_quarter = mercado_sales_prophet_forecast['2020-07-01':'2020-09-30'][['yhat_upper', 'yhat_lower', 'yhat']]
mercado_sales_forecast_quarter = mercado_sales_forecast_quarter.rename(columns = {'yhat_upper': 'Bull Case',
                                              'yhat_lower':'Bear Case', 
                                              'yhat':'Base Case'})
mercado_sales_forecast_quarter.sum()

Bull Case    1052.058892
Bear Case     887.683426
Base Case     969.567200
dtype: float64

In [None]:
# Prepares the search data for a Prophet forecasting model
mercado_prophet_df = search_trends_daily_df.reset_index()
mercado_prophet_df.columns = ['ds', 'y'] 
mercado_prophet_df = mercado_prophet_df.dropna()

# Calls the Prophet function, stores as an object
model_mercado_trends = Prophet()
model_mercado_trends.fit(mercado_prophet_df)

future_mercado_trends = model_mercado_trends.make_future_dataframe(periods=90, freq='D')
forecast_mercado_trends = model_mercado_trends.predict(future_mercado_trends)

# Plots the Prophet predictions for the Mercado trends data
search_prophet_plot = plot_plotly(model_mercado_trends, forecast_mercado_trends)

In [23]:
search_prophet_plot

In [29]:
search_prophet_plot.write_html("search_prophet_plot.html")

`yhat` represents the most likely (average) forecast, whereas `yhat_lower` and `yhat_upper` represents the worst and best case prediction (based on what are known as 95% confidence intervals).

In [30]:
mercado_libre_forecasting = dp.Report(dp.Select(blocks=[
                dp.Plot(daily_fig, label='Day of Week'),
                dp.Plot(monthly_fig, label='Month of Year'),
                dp.Plot(weekly_fig, label='Week of Year'),
                dp.Plot(hourly_fig, label='Hour of Day')
            ]), 
            dp.Plot(q3_fig, caption="Q3 Historical Trends"),
            dp.Plot(revenue_prophet_plot, caption="Revenue Forecast"),
            dp.Plot(search_prophet_plot, caption="Search Activity Forecast"))
mercado_libre_forecasting.upload(name='Sales Forecasting for Mercado Libre', open=True)

Uploading report and associated data - *please wait...*
Report successfully uploaded. View and share your report at https://datapane.com/reports/0AEvM03/mercado-libre-forecastinghtml/, or edit your report at https://datapane.com/reports/0AEvM03/mercado-libre-forecastinghtml/edit/.


In [None]:
# Optional code to create a different visualization for Q3 historical data
q3_fig = make_subplots(specs=[[{"secondary_y" : True}]])
q3_fig.add_trace(go.Scatter(x=q3_traffic.index, y=q3_traffic["Search Trends"], name = "Search Trends", mode="lines+text", line_color="gold"), secondary_y=True)
q3_fig.add_trace(go.Bar(x=q3_traffic.index, y=q3_daily_sales["Revenue"], name = "Revenue", marker_color = "midnightblue"), secondary_y=False)
q3_fig.update_layout(template="simple_white")
q3_fig.update_xaxes(title_text = "Average of 2016-2019", nticks=5, tickformat = "%m/%d")
q3_fig.update_yaxes(title_text="Daily Search Activity", nticks=4, secondary_y=True)
q3_fig.update_yaxes(range=[0, 15], secondary_y=False)
q3_fig.update_yaxes(range=[90000, 125000], secondary_y=True)
q3_fig.update_yaxes(title_text="Daily Revenue (Millions)", tickformat = '$', secondary_y=False)
q3_fig.update_layout(legend=dict(orientation="h", yanchor="bottom", y=.05, xanchor="center", x=.5, font = dict(size = 10, color = "black")))
q3_fig.update_layout(width=1000, height=500)
q3_fig.add_annotation(x="2019-09-16", y= "13", text = "Mexican Independence Day", secondary_y=False)
q3_fig.add_annotation(x="2019-07-16", y= "13", text = "Brazilian Public Holiday", secondary_y=False)
