In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from statsmodels.graphics.tsaplots import plot_pacf, plot_acf
from sklearn.metrics import mean_absolute_error,mean_squared_error

import warnings
warnings.filterwarnings('ignore')

In [None]:
starbucks_stocks = pd.read_csv('starbucks.csv', index_col='Date', parse_dates=True)
starbucks_stocks['Close'].plot(figsize=(20, 8));

In [None]:
plot_acf(starbucks_stocks['Close']);

In [None]:
plot_pacf(starbucks_stocks['Close']);

In [None]:
airline_passengers = pd.read_csv('airline_passengers.csv', index_col='Month', parse_dates=True)
airline_passengers['Passengers'].plot(figsize=(20, 8));

In [None]:
plot_acf(airline_passengers['Passengers']);

In [None]:
plot_pacf(airline_passengers['Passengers']);

In [None]:
# Simulate a white noise series
mean = 0
std = 1 
num_observations = 1000
whitenoise = pd.Series(np.random.normal(mean, std, size=num_observations))

whitenoise.plot(figsize=(20, 8));

In [None]:
# Simulate a random walk series
random_walk = list()
random_walk.append(np.random.normal(mean, std))
for i in range(1, 5000):
 error = np.random.normal(mean, std)
 value = random_walk[i-1] + error
 random_walk.append(value)
pd.Series(random_walk).plot(figsize=(20, 8));

In [None]:
### Forecasting with simple forecasting methods
starbucks_stocks.index

In [None]:
# Split series into training and test set
train=starbucks_stocks[0:700] 
test=starbucks_stocks[700:]
train.Close.plot(figsize=(20,8))
test.Close.plot(figsize=(20,8))
plt.show()

In [None]:
# Naive forecast
y_hat = test.copy()
# naive forecast 
y_hat['naive'] = train['Close'].iloc[len(train)-1]

plt.figure(figsize=(20,8))
plt.plot(train.index, train['Close'], label='Train')
plt.plot(test.index,test['Close'], label='Test')
plt.plot(y_hat.index,y_hat['naive'], label='Naive Forecast')
plt.legend(loc='best')
plt.title("Naive Forecast")
plt.show()

In [None]:
# Forecast with average method
y_hat = test.copy()
# forecast obtained by average method
y_hat['average'] = train['Close'].mean()

plt.figure(figsize=(20,8))
plt.plot(train.index, train['Close'], label='Train')
plt.plot(test.index,test['Close'], label='Test')
plt.plot(y_hat.index,y_hat['average'], label=' Forecast with average method')
plt.legend(loc='best')
plt.title("Forecast with average method")
plt.show()

In [None]:
# Forecast with linear extrapolation
y_hat = test.copy()
y_hat['extrapol'] = 0

# forecast obtained with linear extrapolation
for i in range(0, len(test)):
    y_hat['extrapol'].iloc[i] = train['Close'].iloc[len(train)-1] + (i+1)*(train['Close'].iloc[len(train)-1]-train['Close'].iloc[0])/len(train)

plt.figure(figsize=(20,8))
plt.plot(train.index, train['Close'], label='Train')
plt.plot(test.index,test['Close'], label='Test')
plt.plot(y_hat.index,y_hat['extrapol'], label=' Forecast with linear extrapolation')
plt.legend(loc='best')
plt.title("Forecast with linear extrapolation")
plt.show()


In [None]:
# Seasonal Naive Forecast
airline_passengers = pd.read_csv('airline_passengers.csv', index_col='Month', parse_dates=True)
airline_passengers.index

In [None]:
# Split series into training and test set
train=airline_passengers[0:120] 
test=airline_passengers[120:]
train.Passengers.plot(figsize=(20,8))
test.Passengers.plot(figsize=(20,8))
plt.show()

In [None]:
# Forecast with seasonal naive forecasts
last12months = train[-12:]
snaive_forecasts = pd.concat([last12months, last12months])

# re-index the created forecast to the month of the test data
start = test.index[0].date()
end = test.index[len(test)-1].date()
snaive_forecasts.index = pd.date_range(start=start,end=end,freq='MS')

y_hat = test.copy()
y_hat['snaive_forecasts'] = snaive_forecasts
plt.figure(figsize=(20,8))
plt.plot(train.index, train['Passengers'], label='Train')
plt.plot(test.index,test['Passengers'], label='Test')
plt.plot(y_hat.index,y_hat['snaive_forecasts'], label=' Forecast with seasonal naive forecasts')
plt.legend(loc='best')
plt.title("Forecast with seasonal naive forecasts")
plt.show()



In [None]:
######## Cross-validation
starbucks_stocks = pd.read_csv('starbucks.csv', index_col='Date', parse_dates=True)

# Average Method
starbucks_stocks['Close_AverageMethPrediction'] = 0
# Forecast with average method
for i in range(0, len(starbucks_stocks)):
    starbucks_stocks['Close_AverageMethPrediction'].iloc[i] = starbucks_stocks['Close'].iloc[:i].mean()
# 1-step-ahead forecasting error
starbucks_stocks['forecasting_error_avgmeth'] = starbucks_stocks['Close'] - starbucks_stocks['Close_AverageMethPrediction']

# Naive forecast
starbucks_stocks['Close_NaivePrediction'] = starbucks_stocks['Close'].shift(1)
# 1-step-ahead forecasting error
starbucks_stocks['forecasting_error_naive'] = starbucks_stocks['Close'] - starbucks_stocks['Close_NaivePrediction']
starbucks_stocks.head()

In [None]:
# mean squared & absolute 1-step-ahead forecasting errors
print(mean_absolute_error(starbucks_stocks['Close_NaivePrediction'][1:],starbucks_stocks['Close'][1:]))
print(mean_squared_error(starbucks_stocks['Close_NaivePrediction'][1:],starbucks_stocks['Close'][1:]))

print(mean_absolute_error(starbucks_stocks['Close_AverageMethPrediction'][1:],starbucks_stocks['Close'][1:]))
print(mean_squared_error(starbucks_stocks['Close_AverageMethPrediction'][1:],starbucks_stocks['Close'][1:]))

In [None]:
# Dealing with missing data
# This data set has some missing observations 
airline_passengers2 = pd.read_csv('airline_passengers_withNaN.csv', index_col='Month', parse_dates=True)

# Fill missings with forward fill
airline_passengers2 = airline_passengers2.assign(FillMissing_forwardfill=airline_passengers2.Passengers.fillna(method = 'ffill'))
airline_passengers2.FillMissing_forwardfill.plot()
airline_passengers2.Passengers.plot()

In [None]:
# Fill missings with rolling mean
airline_passengers2 = airline_passengers2.assign(FillMissing_movingavg=airline_passengers2.Passengers.fillna(airline_passengers2["Passengers"].rolling(10,min_periods=1).mean()))
airline_passengers2.FillMissing_movingavg.plot()
airline_passengers2.Passengers.plot();

