In [24]:
# for presentation purposes
import warnings
warnings.filterwarnings("ignore")

# wrangle
import prepare as p
import os

# transform
import numpy as np
import pandas as pd

# visualize 
import matplotlib.pyplot as plt
import seaborn as sns

# working with dates
from datetime import datetime

# modeling
import statsmodels.api as sm
from statsmodels.tsa.api import Holt, ExponentialSmoothing
np.random.seed(0)

# evaluate
from sklearn.metrics import mean_squared_error
from math import sqrt 

## Exercises

The end result of this exercise should be a Jupyter notebook named `model`.

Using [saas.csv](https://ds.codeup.com/saas.csv) or log data from API usage

1. Split data (train/test) and resample by any period, except daily, and aggregate using the sum. 
2. Forecast, plot and evaluate using each at least 4 of the methods we discussed:
    - Last Observed Value
    - Simple Average
    - Moving Average
    - Holt's Linear Trend 
    - Holt's Seasonal Trend
    - Based on previous year/month/etc., this is up to you.

Bonus: 
1. Using the store item demand data, create a forecast of `sales_total` and `quantity` for 2018 using the `Previous Cycle` approach.  .  
2. Predict 2018 total **monthly** sales for a single store and/or item by creating a model using prophet.
3. Return a dataframe with the month, store_id, y-hat, and the confidence intervals (y-hat lower, y-hat upper).
4. Plot the 2018 monthly sales predictions.

In [25]:
df= pd.read_csv('saas.csv')

In [26]:
df.head()

Unnamed: 0,Month_Invoiced,Customer_Id,Invoice_Id,Subscription_Type,Amount
0,2014-01-31,1000000,5000000,0.0,0.0
1,2014-01-31,1000001,5000001,2.0,10.0
2,2014-01-31,1000002,5000002,0.0,0.0
3,2014-01-31,1000003,5000003,0.0,0.0
4,2014-01-31,1000004,5000004,1.0,5.0


In [27]:
df.dtypes

Month_Invoiced        object
Customer_Id            int64
Invoice_Id             int64
Subscription_Type    float64
Amount               float64
dtype: object

In [28]:
df.drop(columns= ['Customer_Id', 'Invoice_Id', 'Subscription_Type'], inplace= True)
df.rename(columns= {'Month_Invoiced': 'date_invoiced', 'Amount': 'amount'}, inplace= True)

df.head(1)

Unnamed: 0,date_invoiced,amount
0,2014-01-31,0.0


In [29]:
df['date_invoiced']= pd.to_datetime(df['date_invoiced'])
df.dtypes

date_invoiced    datetime64[ns]
amount                  float64
dtype: object

In [32]:
df= df.set_index('date_invoiced')
df.head(1)

Unnamed: 0_level_0,amount
date_invoiced,Unnamed: 1_level_1
2014-01-31,0.0


In [33]:
train, val, test= p.time_split(df)

Train shape: (474274, 1)

Validate shape: (158092, 1)

Test shape: (158092, 1)

