In [None]:
from numpy import asarray
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.metrics import mean_absolute_error
from matplotlib import pyplot

In [None]:
import numpy as np
from sklearn.tree import DecisionTreeRegressor

class MyRandomForestRegressor:
    def __init__(self, n_estimators=100, max_depth=None, min_samples_split=2, min_samples_leaf=1):
        self.n_estimators = n_estimators
        self.max_depth = max_depth
        self.min_samples_split = min_samples_split
        self.min_samples_leaf = min_samples_leaf
        self.trees = []

    def fit(self, X, y):
        for _ in range(self.n_estimators):
            bootstrap_indices = np.random.choice(len(X), len(X), replace=True)
            X_bootstrap = X[bootstrap_indices]
            y_bootstrap = y[bootstrap_indices]
            
            tree = DecisionTreeRegressor(
                max_depth=self.max_depth,
                min_samples_split=self.min_samples_split,
                min_samples_leaf=self.min_samples_leaf
            )
            tree.fit(X_bootstrap, y_bootstrap)
            self.trees.append(tree)

    def predict(self, X):
        predictions = np.zeros(len(X))
        for tree in self.trees:
            tree_predictions = tree.predict(X)
            predictions += tree_predictions
        return predictions / len(self.trees)




Training

# forecast monthly births with random forest
from numpy import asarray
from pandas import read_csv
from pandas import DataFrame
from pandas import concat
from sklearn.metrics import mean_absolute_error
from matplotlib import pyplot

# transform a time series dataset into a supervised learning dataset
def series_to_supervised(data, n_in=1, n_out=1, dropnan=True):
	n_vars = 1 if type(data) is list else data.shape[1]
	df = DataFrame(data)
	cols = list()
	# input sequence (t-n, ... t-1)
	for i in range(n_in, 0, -1):
		cols.append(df.shift(i))
	# forecast sequence (t, t+1, ... t+n)
	for i in range(0, n_out):
		cols.append(df.shift(-i))
	# put it all together
	agg = concat(cols, axis=1)
	# drop rows with NaN values
	if dropnan:
		agg.dropna(inplace=True)
	return agg.values

# split a univariate dataset into train/test sets
def train_test_split(data, n_test):
	return data[:-n_test, :], data[-n_test:, :]

# fit an random forest model and make a one step prediction
def random_forest_forecast(train, testX):
	# transform list into array
	train = asarray(train)
	# split into input and output columns
	trainX, trainy = train[:, :-1], train[:, -1]
	# fit model
	model = MyRandomForestRegressor(n_estimators=1000)
	model.fit(trainX, trainy)
	# make a one-step prediction
	yhat = model.predict([testX])
	return yhat[0]

# walk-forward validation for univariate data
def walk_forward_validation(data, n_test):
	predictions = list()
	# split dataset
	train, test = train_test_split(data, n_test)
	# seed history with training dataset
	history = [x for x in train]
	# step over each time-step in the test set
	for i in range(len(test)):
		# split test row into input and output columns
		testX, testy = test[i, :-1], test[i, -1]
		# fit model on history and make a prediction
		yhat = random_forest_forecast(history, testX)
		# store forecast in list of predictions
		predictions.append(yhat)
		# add actual observation to history for the next loop
		history.append(test[i])
		# summarize progress
		print('>expected=%.1f, predicted=%.1f' % (testy, yhat))
	# estimate prediction error
	error = mean_absolute_error(test[:, -1], predictions)
	return error, test[:, -1], predictions

# load the dataset
series = read_csv('data_daily.csv', header=0, index_col=0)
values = series.values
n_in=17
# transform the time series data into supervised learning
data = series_to_supervised(values, n_in)
# evaluate
mae, y, yhat = walk_forward_validation(data, 12)
print('MAE: %.3f' % mae)
# plot expected vs predicted
pyplot.plot(y, label='Expected')
pyplot.plot(yhat, label='Predicted')
pyplot.legend()
pyplot.show()

In [None]:
import numpy as np
import pickle
from pandas import read_csv
n_in=17
series = read_csv('data_daily.csv', header=0, index_col=0)
values = series.values
# transform the time series data into supervised learning
train = series_to_supervised(values, n_in)
# split into input and output columns
trainX, trainy = train[:, :-1], train[:, -1]
# fit model
model = MyRandomForestRegressor(n_estimators=1000)
model.fit(trainX, trainy)
pickle.dump(model, open('rfmodel.pkl','wb'))



In [None]:
# construct an input for a new prediction
for i in range(366):
    row = values[-n_in:].flatten()
    # make a one-step prediction
    yhat = model.predict(asarray([row]))
    print('Input: %s, Predicted: %.3f' % (row[-1], yhat[0]))
    values=np.append(values,[yhat[0]]).reshape(-1,1)

In [None]:

pyplot.plot(values, label='Predicted')
pyplot.legend()
pyplot.show()


In [None]:
series = read_csv('data_daily.csv', header=0, index_col=0)
truevalues = series.values
n_in=17
data = series_to_supervised(truevalues, n_in)
# evaluate
mae, y, yhat = walk_forward_validation(data, 40)
residuals=y-yhat
import pandas as pd
from arch import arch_model

# Assuming 'residuals' is a pandas Series containing the residuals
# Specify the GARCH model order (p and q) and other parameters
garch_model = arch_model(residuals, vol='Garch', p=1, q=1)

# Fit the GARCH model
garch_model_fit = garch_model.fit()

# Display model summary
print(garch_model_fit.summary())

# Get volatility forecasts for future periods
forecast_horizon = 5  # Adjust as needed
forecast = garch_model_fit.forecast(horizon=forecast_horizon)

import numpy as np

# Assuming you have the GARCH model and Random Forest predictions
# garch_model = arch_model(residuals, vol='Garch', p=1, q=1)
# rf_predictions = ...  # Replace with your Random Forest predictions

# Set the desired confidence level (e.g., 95%)
confidence_level = 0.95

# Calculate the z-score for the desired confidence level
from scipy.stats import norm
z_score = norm.ppf(1 - (1 - confidence_level) / 2)

# Get the last forecasted volatility value from the GARCH model
last_forecasted_volatility = garch_model_fit.forecast(horizon=1).variance.iloc[-1, :].values[0]

# Calculate the margin of error
margin_of_error = z_score * np.sqrt(last_forecasted_volatility)

# Calculate the lower and upper bounds of the prediction interval
lower_bound = values - margin_of_error
upper_bound = values + margin_of_error

print("Prediction Interval for {}% Confidence:".format(int(confidence_level * 100)))
for i in range(365,len(values)):
    print('>Lower=%.1f,Prediction=%.1f, Upper=%.1f' % (lower_bound[i],values[i],upper_bound[i]))



In [None]:
residuals=y-yhat

In [None]:
import pandas as pd
from arch import arch_model

# Assuming 'residuals' is a pandas Series containing the residuals
# Specify the GARCH model order (p and q) and other parameters
garch_model = arch_model(residuals, vol='Garch', p=1, q=1)

# Fit the GARCH model
garch_model_fit = garch_model.fit()

# Display model summary
print(garch_model_fit.summary())

# Get volatility forecasts for future periods
forecast_horizon = 5  # Adjust as needed
forecast = garch_model_fit.forecast(horizon=forecast_horizon)

In [None]:
import numpy as np

# Assuming you have the GARCH model and Random Forest predictions
# garch_model = arch_model(residuals, vol='Garch', p=1, q=1)
# rf_predictions = ...  # Replace with your Random Forest predictions

# Set the desired confidence level (e.g., 95%)
confidence_level = 0.95

# Calculate the z-score for the desired confidence level
from scipy.stats import norm
z_score = norm.ppf(1 - (1 - confidence_level) / 2)

# Get the last forecasted volatility value from the GARCH model
last_forecasted_volatility = garch_model_fit.forecast(horizon=1).variance.iloc[-1, :].values[0]

# Calculate the margin of error
margin_of_error = z_score * np.sqrt(last_forecasted_volatility)

# Calculate the lower and upper bounds of the prediction interval
lower_bound = values - margin_of_error
upper_bound = values + margin_of_error

print("Prediction Interval for {}% Confidence:".format(int(confidence_level * 100)))
for i in range(365,len(values)):
    print('>Lower=%.1f,Prediction=%.1f, Upper=%.1f' % (lower_bound[i],values[i],upper_bound[i]))



In [None]:
pyplot.plot(values, label='Predicted')
pyplot.plot(lower_bound, label='Lower')
pyplot.plot(upper_bound, label='Upper')
pyplot.legend()
pyplot.show()


In [None]:
import pandas as pd
df = read_csv('data_daily.csv')
df['Date'] = pd.to_datetime(df['# Date'])

# Set the Date column as the DataFrame's index
df.set_index('Date', inplace=True)

# Resample the daily data to monthly and sum the 'receipt_count' for each month
monthly_data = df['Receipt_Count'].resample('M').sum()

# Create a new DataFrame with the monthly totals



In [None]:
import numpy as np
import pickle
from pandas import read_csv
values = monthly_data.values
values=values.reshape(-1,1)
# transform the time series data into supervised learning
train = series_to_supervised(values, n_in=6)
# split into input and output columns
trainX, trainy = train[:, :-1], train[:, -1]
# fit model
modelMonth = MyRandomForestRegressor(n_estimators=1000)
modelMonth.fit(trainX, trainy)
pickle.dump(model, open('modelMonth.pkl','wb'))


In [None]:
# construct an input for a new prediction
for i in range(12):
    row = values[-6:].flatten()
    # make a one-step prediction
    yhat = modelMonth.predict(asarray([row]))
    print('Input: %s, Predicted: %.3f' % (row, yhat[0]))
    values=np.append(values,[yhat[0]]).reshape(-1,1)


In [None]:
pyplot.plot(values, label='Predicted')
pyplot.legend()
pyplot.show()

In [None]:
from statsmodels.tsa.api import ExponentialSmoothing

forecast_period = 365
series = read_csv('data_daily.csv', header=0, index_col=0)
train_data = series.values

# Fit data 
model = ExponentialSmoothing(
    train_data,
    seasonal_periods=4,
    trend="add",
    seasonal="add",
    use_boxcox=True,
    initialization_method="estimated",
).fit()

# Forecast next 5 periods
forecast = model.forecast(forecast_period)

In [None]:
pyplot.plot(series.values, label='Expected')
pyplot.plot(forecast,label='Predicted')