In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
import pmdarima as pm
import statsmodels as sm

# to ignore convergence warnings 
import warnings
warnings.filterwarnings('ignore')

from statsmodels.tools.sm_exceptions import ConvergenceWarning, InterpolationWarning
warnings.simplefilter('ignore', ConvergenceWarning)
warnings.simplefilter('ignore', InterpolationWarning)


# Intro to Time Series

## Additive and multiplicative seasonality – can you identify them correctly?


<!-- <img src="https://github.com/MKB-Datalab/workshop_ts_forecasting/images/image_01.png" /> -->

<img src="../images/image_01.png" />

Additive or multiplicative?

**Answer:**

<!-- <img src="https://github.com/MKB-Datalab/workshop_ts_forecasting/images/image_02.png" /> -->

<img src="../images/image_02.png" />

Additive or multiplicative?

**Answer:**

Do you want to test how good you are in identifying additive and multiplicative seasonality comparing with statistics and other humans? 

Go to this [link](https://kourentzes.com/forecasting/2014/11/09/additive-and-multiplicative-seasonality)

## Time Series Decomposition

Classify trend, seasonality and if model is additive or multiplicative based on the time series decomposition.

### Google Trends - search for 'diet'

In [None]:
# load US data - diet_USA_2016_2023.csv

diet_USA = pd.read_csv('YOUR CODE HERE',
                   index_col=[0],
                  parse_dates=[0])

In [None]:
# Plot the entire time series diet and show gridlines
diet_USA.plot(grid=True,figsize=(17,5), title = "Number of searchs of word Diet in USA" )
plt.show()

Before running next cell:
- Do you see any pattern here?
- Do you think this time series has seasonality?
- Trend is additive or multiplicative?

Let's run the next cell and observe the components trend, seasonality, and noise.

* You can also try the code below with `model = 'additive'`

In [None]:
from pylab import rcParams
rcParams['figure.figsize'] = 11, 9

import statsmodels.api as sm
decomposition = sm.tsa.seasonal_decompose(diet_USA['diet: (United States)'],
                                         model = 'multiplicative',
                                         period=53 #52 to 53 weeks in a year
                                         )
fig = decomposition.plot()
plt.show()

Now, repeat the steps above to the Dutch data ().

What you observe there?

How you would compare the search of the word `diet` in both countries?

In [None]:
# PUT YOUR CODE HERE

### [Air Passenger Data](https://www.kaggle.com/datasets/ashfakyeafi/air-passenger-data-for-time-series-analysis)

Download the data and apply the same steps above.

What you can conclude now?

In [None]:
# PUT YOUR CODE HERE

Extra: You can use some data from `statsmodel` to play a bit:

* [Mauna Loa Weekly Atmospheric CO2 Data](https://www.statsmodels.org/dev/datasets/generated/co2.html)
* [Grunfeld (1950) Investment Data](https://www.statsmodels.org/dev/datasets/generated/grunfeld.html)
* [Nile River flows at Ashwan 1871-1970](https://www.statsmodels.org/dev/datasets/generated/nile.html)
* [Yearly sunspots data 1700-2008](https://www.statsmodels.org/dev/datasets/generated/sunspots.html)

To load any of these data use the following code as base:

For some of the above data you might need to apply the following:

## Stationary Tests

![](../images/ADF_KPSS.jpg)

<!-- ![](https://github.com/MKB-Datalab/workshop_ts_forecasting/images/Box-Jenkins-Method.PNG) -->

The functions bellow can be found [here](https://www.statsmodels.org/stable/examples/notebooks/generated/stationarity_detrending_adf_kpss.html).


In [None]:
from statsmodels.tsa.stattools import adfuller, kpss

def adf_test(timeseries):
    print ('Results of Dickey-Fuller Test:')
    dftest = adfuller(timeseries, autolag='AIC')
    dfoutput = pd.Series(dftest[0:4], index=['Test Statistic','p-value','#Lags Used','Number of Observations Used'])
    for key,value in dftest[4].items():
        dfoutput['Critical Value (%s)'%key] = value
    print (dfoutput)
    
def kpss_test(timeseries):
    print ('Results of KPSS Test:')
    kpsstest = kpss(timeseries, regression='c', nlags="auto")
    kpss_output = pd.Series(kpsstest[0:3], index=['Test Statistic','p-value','Lags Used'])
    for key,value in kpsstest[3].items():
        kpss_output['Critical Value (%s)'%key] = value
    print (kpss_output)    

Apply both tests to Google Trends Diet USA data. What can you conclude?

In [None]:
# YOUR CODE HERE

The function bellow puts all together and gives the answer to you.

In [None]:
def obtain_adf_kpss_results(timeseries, max_d):
    """ Build dataframe with ADF statistics and p-value for time series after applying difference on time series
    
    Args:
        time_series (df): Dataframe of univariate time series  
        max_d (int): Max value of how many times apply difference
        
    Returns:
        Dataframe showing values of ADF statistics and p when applying ADF test after applying d times 
        differencing on a time-series.
    
    """
    
    results=[]

    for idx in range(max_d):
        adf_result = adfuller(timeseries, autolag='AIC')
        kpss_result = kpss(timeseries, regression='c', nlags="auto")
        timeseries = timeseries.diff().dropna()
        if adf_result[1] <=0.05:
            adf_stationary = True
        else:
            adf_stationary = False
        if kpss_result[1] <=0.05:
            kpss_stationary = False
        else:
            kpss_stationary = True
            
        stationary = adf_stationary & kpss_stationary
            
        results.append((idx,adf_result[1], kpss_result[1],adf_stationary,kpss_stationary, stationary))
    
    # Construct DataFrame 
    results_df = pd.DataFrame(results, columns=['d','adf_stats','kpss_stats', 'is_adf_stationary','is_kpss_stationary','is_stationary' ])
    
    return results_df


In [None]:
# YOUR CODE HERE

## Making Time Series Stationary through Differencing

In [None]:
# Plot diet_USA again

# YOUR CODE HERE

After differencing....

In [None]:
# Apply differencing on diet_USA the necessary number of times and plot

# YOUR CODE HERE

Apply tests again...

In [None]:
# YOUR CODE HERE