https://iq.opengenus.org/time-series-prediction/

In [None]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow import keras
import pandas as pd

# Add SourceMLFile

In [None]:
df = pd.read_csv('_data/MLSourceFileFromCSV.csv', index_col=0)
print(df)

In [None]:
def plot_series(time, series, format="-", start=0, end=None):
    plt.plot(time[start:end], series[start:end], format)
    plt.xlabel("Time")
    plt.ylabel("Value")
    plt.grid(True)

def trend(time, slope=0):
    return slope * time

def seasonal_pattern(season_time):
    """Just an arbitrary pattern, you can change it if you wish"""
    return np.where(season_time < 0.4,
                    np.cos(season_time * 2 * np.pi),
                    1 / np.exp(3 * season_time))

def seasonality(time, period, amplitude=1, phase=0):
    """Repeats the same pattern at each period"""
    season_time = ((time + phase) % period) / period
    return amplitude * seasonal_pattern(season_time)

def noise(time, noise_level=1, seed=None):
    rnd = np.random.RandomState(seed)
    return rnd.randn(len(time)) * noise_level

time = np.arange(4 * 365 + 1, dtype="float32")
baseline = 10
series = trend(time, 0.1)  
baseline = 10
amplitude = 40
slope = 0.05
noise_level = 5

# Create the series
series = baseline + trend(time, slope) + seasonality(time, period=365, amplitude=amplitude)
# Update with noise
series += noise(time, noise_level, seed=42)

series = np.array(df['11th Ave[1-0]'])[:4*365+1]

plt.figure(figsize=(10, 6))
plot_series(time, series)
plt.show()


In [None]:
split_time = 1000
time_train = time[:split_time]
x_train = series[:split_time]
time_valid = time[split_time:]
x_valid = series[split_time:]
plt.figure(figsize=(10, 6))
plot_series(time_train, x_train)
plt.show()

plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plt.show()


# Native Forecast

In [None]:
naive_forecast = series[split_time - 1:-1]
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plot_series(time_valid, naive_forecast)


# Zoom in

In [None]:
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid, start=0, end=150)
plot_series(time_valid, naive_forecast, start=1, end=151)

# Mean Sq Error / Mean Absolute Error

In [None]:
print("Mean Squared Error: ", keras.metrics.mean_squared_error(x_valid,naive_forecast).numpy())
print("Mean Absolute Error", keras.metrics.mean_absolute_error(x_valid, naive_forecast).numpy())


### Try moving average (MSE > 5)

In [None]:
def moving_average_forecast(series, window_size):
  """Forecasts the mean of the last few values.
     If window_size=1, then this is equivalent to naive forecast"""
  forecast = []
  for time in range(len(series) - window_size):
    forecast.append(series[time:time + window_size].mean())
  return np.array(forecast)

moving_avg = moving_average_forecast(series, 10)[split_time - 10:]
plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plot_series(time_valid, moving_avg) 

Above: Forecast line after applying moving average.
Check MSE and MAE again.

In [None]:
print("Mean Squared Error: ", keras.metrics.mean_squared_error(x_valid, moving_avg).numpy())
print("Mean Absolute Error", keras.metrics.mean_absolute_error(x_valid, moving_avg).numpy())

We got a mean absolute error value of 7.14 which is worse than the naive forecasting value. It happened because it does not anticipate for trend and seasonality. To solve this issue we'll use differencing. Since the seasonality period is 365 days, we will subtract the value at time t – 365 from the value at time t.

In [None]:
diff_series = (series[365:] - series[:-365])
diff_time = time[365:]

diff_moving_avg = moving_average_forecast(diff_series, 50)[split_time - 365 - 50:]

plt.figure(figsize=(10, 6))
plot_series(time_valid, diff_series[split_time - 365:])
plot_series(time_valid, diff_moving_avg)
plt.show()


Now let's bring back the trend and seasonality by adding the past values from t – 365:

In [None]:
diff_moving_avg_plus_past = series[split_time - 365:-365] + diff_moving_avg

plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plot_series(time_valid, diff_moving_avg_plus_past)
plt.show()


In [None]:
print("Mean Squared Error: ", keras.metrics.mean_squared_error(x_valid, diff_moving_avg_plus_past).numpy())
print("Mean Absolute Error: ", keras.metrics.mean_absolute_error(x_valid, diff_moving_avg_plus_past).numpy())


Better than naive forecast, good. However the forecasts look a bit too random, because we're just adding past values, which were noisy. Let's use a moving averaging on past values to remove some of the noise:

In [None]:
diff_moving_avg_plus_smooth_past = moving_average_forecast(series[split_time - 370:-360], 10) + diff_moving_avg

plt.figure(figsize=(10, 6))
plot_series(time_valid, x_valid)
plot_series(time_valid, diff_moving_avg_plus_smooth_past)
plt.show()


In [None]:
print(keras.metrics.mean_squared_error(x_valid, diff_moving_avg_plus_smooth_past).numpy())
print(keras.metrics.mean_absolute_error(x_valid, diff_moving_avg_plus_smooth_past).numpy())