In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sktime.utils.plotting import plot_correlations, plot_acf, plot_pacf, plot_series


## What is White Noise ?

A time series is white noise if the variables are independent and identically distributed with a mean of zero.

## Why Does it Matter?
White noise is an important concept in time series analysis and forecasting.

It is important for two main reasons:

* `Predictability`: If your time series is white noise, then, by definition, it is random. You cannot reasonably model it and make predictions.
* `Model Diagnostics` : The series of errors from a time series forecast model should ideally be white noise.
Model Diagnostics is an important area of time series forecasting.

Time series data are expected to contain some white noise component on top of the signal generated by the underlying process


## Is your Time Series White Noise?
Your time series is probably NOT white noise if one or more of the following conditions are true:

* Is the mean/level non-zero?
* Does the mean/level change over time?
* Does the variance change over time?
* Do values correlate with lag values?

Some tools that you can use to check if your time series is white noise are:

* Create a lag plot
* Calculate summary statistics. Check the mean and variance of the whole series against the mean and variance of meaningful contiguous blocks of values in the series (e.g. days, months, or years).
* Create an autocorrelation plot. Check for gross correlation between lagged variables.

In [2]:
ar_process = lambda y_intercept, alpha, lag, noise : y_intercept + alpha*lag + noise

def build_ar_process(num_timesteps=300, intercept=0, lag_coef=0.9, p=1):
    rng = np.random.RandomState(seed=42)
    noise = rng.normal(size=num_timesteps)

    y = np.zeros(num_timesteps)

    for i in range(p, num_timesteps):
        y[i] = ar_process(intercept, lag_coef, y[i-p], noise[i-p])
    
    return y, noise





ar_1, white_noise = build_ar_process(p=1)

index = pd.date_range(start='2000-01-01', periods=300)
df = pd.DataFrame(data={'noise':white_noise, 'AR-1':ar_1}, index=index)

# AR-1 process
df.head()

Unnamed: 0,noise,AR-1
2000-01-01,0.496714,0.0
2000-01-02,-0.138264,0.496714
2000-01-03,0.647689,0.308778
2000-01-04,1.52303,0.925589
2000-01-05,-0.234153,2.35606


### White-noise has no predictive information in past values and there are no correlation at any two points in time.

In [None]:
df.plot(y='noise', figsize=(15,4))

<img src='./plots/whitenoise.png'>

## Create a lag plot to check if your time series is white noise

In [None]:
# lets create a log plot

pd.plotting.lag_plot(series=df['noise'], lag=1);
plt.title('Lag plot of whitenoise');

<img src='./plots/whitenoise-lag-plot.png'>

In [None]:
fig, ax = plt.subplots(nrows=2, ncols=3, figsize=(16, 8))

ax = ax.ravel()

for i,frame in enumerate(ax):
    pd.plotting.lag_plot(series=df['noise'], lag=i+1, ax=frame)

fig.suptitle('White-noise has no predictive information in past values');

<img src='./plots/whitenoise-lag-plot-4x3.png'>

### we can create a correlogram and check for any autocorrelation with lag variables.

In [3]:
ax = plot_correlations(df['noise']);
ax[0].set_constrained_layout(False)
ax[0].autofmt_xdate()

<img src='./plots/correlogram-whitenoise.png'>