# Libraries

In [1]:
import numpy as np
import pandas as pd
import yfinance as yf
import plotly.express as px
import plotly.graph_objects as go

from math import sqrt

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_percentage_error, mean_squared_error, mean_absolute_error

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense
from tensorflow.keras.layers import LSTM, Dense

from statsmodels.tsa.arima_process import arma_generate_sample

from prophet import Prophet

from statsmodels.tsa.arima_process import arma_generate_sample

# White noise in two types

In [5]:
def generate_synthetic_data(length, noise_std, break_point=None, break_value=None, post_break_noise_std=None):
    time = np.arange(length)
    noise = np.random.normal(scale=noise_std, size=length)
    data = np.cumsum(noise)

    if break_point is not None and break_value is not None:
        if post_break_noise_std is not None:
            post_break_noise = np.random.normal(scale=post_break_noise_std, size=length - break_point)
            data[break_point:] += break_value + np.cumsum(post_break_noise)
        else:
            data[break_point:] += break_value

    index_numeric = pd.RangeIndex(start=0, stop=length, name='Index')
    index_date = pd.date_range(start='2012-01-01', periods=length, name='ds')

    df_numeric = pd.DataFrame({'Value': data}, index=index_numeric)
    df_date = pd.DataFrame({'ds': index_date, 'y': data}, index=index_date)

    return df_numeric, df_date


length = 2842
noise_std = 1.0
break_point = None
break_value = 25
post_break_noise_std = 2.0

df_numeric, df_date = generate_synthetic_data(length, noise_std, break_point, break_value, post_break_noise_std)


fig_numeric = px.line(df_numeric, y='Value', title='Synthetic time series without structural break (numerical indices)')


fig_numeric.add_shape(type="line",
                      x0=break_point, y0=df_numeric['Value'].min(), x1=break_point, y1=df_numeric['Value'].max(),
                      line=dict(color="red", width=1, dash="dash"))

fig_numeric.show()


fig_date = px.line(df_date, x='ds', y='y', title='Synthetic time series without structural break (indexes in date format)')


fig_date.add_shape(type="line",
                   x0=df_date.index[break_point], y0=df_date['y'].min(), x1=df_date.index[break_point], y1=df_date['y'].max(),
                   line=dict(color="red", width=1, dash="dash"))

fig_date.show()



Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



# GRU (White)

In [10]:

data = df_numeric.copy()
scaler = MinMaxScaler()
data_scaled = scaler.fit_transform(data)


train_size = int(len(data_scaled) * 0.8)
GRU_W_train_data = data_scaled[:train_size]
GRU_W_test_data = data_scaled[train_size:]

# GRU model
GRU_model = tf.keras.Sequential([
    tf.keras.layers.GRU(units=64, activation='tanh', input_shape=(1, 1)),
    tf.keras.layers.Dense(units=1)
])

GRU_model.compile(optimizer='adam', loss='mean_squared_error')
GRU_model.fit(GRU_W_train_data[:-1], GRU_W_train_data[1:], epochs=100, batch_size=32)


GRU_predictions = GRU_model.predict(GRU_W_test_data[:-1])


GRU_predictions = scaler.inverse_transform(GRU_predictions)



Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [11]:

fig = px.line()
fig.add_scatter(x=data.index[train_size+1:], y=data['Value'][train_size+1:], name='Real Values')
fig.add_scatter(x=data.index[train_size+1:], y=GRU_predictions.flatten(), name='Predicted Values')
fig.update_layout(title='GRU Model, Synth Data without structural break', xaxis_title='Date', yaxis_title='Value')
fig.show()


In [12]:

fig = px.line()
fig.add_scatter(x=data.index, y=data['Value'], name='All Data')
fig.add_scatter(x=data.index[train_size+1:], y=GRU_predictions.flatten(), name='Predicted Values')
fig.update_layout(title='GRU Model Synth Data without structural break', xaxis_title='Date', yaxis_title='Value')
fig.add_shape(type="line",
                      x0=break_point, y0=df_numeric['Value'].min(), x1=break_point, y1=df_numeric['Value'].max(),
                      line=dict(color="red", width=1, dash="dash"))
fig.show()

In [13]:



GRU_true_values = scaler.inverse_transform(GRU_W_test_data[1:])


In [14]:

GRU_mape = mean_absolute_percentage_error(GRU_true_values, GRU_predictions)
GRU_mse = mean_squared_error(GRU_true_values, GRU_predictions)
GRU_rmse = np.sqrt(GRU_mse)
GRU_mae = mean_absolute_error(GRU_true_values, GRU_predictions)

print('MAPE:', GRU_mape)
print('MSE:', GRU_mse)
print('RMSE:', GRU_rmse)
print('MAE:', GRU_mae)

MAPE: 0.028529358165310166
MSE: 0.8930939671279751
RMSE: 0.9450364898394004
MAE: 0.7445937655361489


In [15]:
def CS_mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

mape = CS_mean_absolute_percentage_error(GRU_true_values, GRU_predictions)

print(f'MAPE: {mape:.2f}%')


MAPE: 2.85%


# LSTM (White)

In [16]:

train_size = int(len(df_numeric) * 0.8)
LSTM_W_train, LSTM_W_test = df_numeric[0:train_size], df_numeric[train_size:len(df_numeric)]


scaler = MinMaxScaler(feature_range=(0, 1))
LSTM_W_train_scaled = scaler.fit_transform(LSTM_W_train['Value'].values.reshape(-1, 1))


window_size = 30


LSTM_W_X_train, LSTM_W_y_train = [], []
for i in range(window_size, len(LSTM_W_train_scaled)):
    LSTM_W_X_train.append(LSTM_W_train_scaled[i-window_size:i, 0])
    LSTM_W_y_train.append(LSTM_W_train_scaled[i, 0])
LSTM_W_X_train, LSTM_W_y_train = np.array(LSTM_W_X_train), np.array(LSTM_W_y_train)


LSTM_W_X_train = np.reshape(LSTM_W_X_train, (LSTM_W_X_train.shape[0], LSTM_W_X_train.shape[1], 1))


LSTM_W_model = Sequential()
LSTM_W_model.add(LSTM(64, input_shape=(window_size, 1)))
LSTM_W_model.add(Dense(1))
LSTM_W_model.compile(loss='mean_squared_error', optimizer='adam')
LSTM_W_model.summary()


LSTM_W_model.fit(LSTM_W_X_train, LSTM_W_y_train, epochs=100, batch_size=32, verbose=1)


test_scaled = scaler.transform(LSTM_W_test['Value'].values.reshape(-1, 1))
LSTM_W_X_test, LSTM_W_y_test = [], []
for i in range(window_size, len(test_scaled)):
    LSTM_W_X_test.append(test_scaled[i-window_size:i, 0])
    LSTM_W_y_test.append(test_scaled[i, 0])
LSTM_W_X_test, LSTM_W_y_test = np.array(LSTM_W_X_test), np.array(LSTM_W_y_test)
LSTM_W_X_test = np.reshape(LSTM_W_X_test, (LSTM_W_X_test.shape[0], LSTM_W_X_test.shape[1], 1))
LSTM_W_predicted = LSTM_W_model.predict(LSTM_W_X_test)
LSTM_W_predicted = scaler.inverse_transform(LSTM_W_predicted)



Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 64)                16896     
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 16,961
Trainable params: 16,961
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100


In [17]:

fig = px.line(df_numeric, y='Value', title='LSTM Model Synth Data with structural break')
fig.add_scatter(x=LSTM_W_test.index[window_size:], y=LSTM_W_predicted.flatten(), mode='lines', name='Predicted')


fig.add_shape(type="line",
                      x0=break_point, y0=df_numeric['Value'].min(), x1=break_point, y1=df_numeric['Value'].max(),
                      line=dict(color="red", width=1, dash="dash"))
fig.show()

In [18]:



np.set_printoptions(suppress=True, precision=4)  
LSTM_W_pred = LSTM_W_predicted

LSTM_real = LSTM_W_test['Value'].values[window_size:].reshape(-1, 1)

In [19]:
New_mape = mean_absolute_percentage_error(LSTM_real, LSTM_W_pred )


In [20]:
New_mape

0.029047204134858062

In [21]:

LSTM_W_mae = np.mean(np.abs(LSTM_W_predicted - LSTM_W_test['Value'].values[window_size:].reshape(-1, 1)))
print(f'MAE: {LSTM_W_mae}')


LSTM_W_mse = np.mean((LSTM_W_predicted - LSTM_W_test['Value'].values[window_size:].reshape(-1, 1))**2)
print(f'MSE: {LSTM_W_mse:.2f}')


LSTM_W_rmse = np.sqrt(LSTM_W_mse)
print(f'RMSE: {LSTM_W_rmse:.2f}')


print(f'MAPE: {New_mape:.2f}')

MAE: 0.7642997712987698
MSE: 0.96
RMSE: 0.98
MAPE: 0.03


# CNN (White)

In [22]:
CNN_W_closing_prices = df_numeric['Value'].values

In [23]:

train_size = int(len(CNN_W_closing_prices) * 0.8)
CNN_W_train_data = CNN_W_closing_prices[:train_size]
CNN_W_test_data = CNN_W_closing_prices[train_size:]

In [24]:



def create_sequences(data, seq_length):
    X = []
    y = []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)


seq_length = 15
CNN_W_X_train, CNN_W_y_train = create_sequences(CNN_W_train_data, seq_length)
CNN_W_X_test, CNN_W_y_test = create_sequences(CNN_W_test_data, seq_length)


input_shape = (CNN_W_X_train.shape[1], 1)  


CNN_W_model = Sequential()
CNN_W_model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape))
CNN_W_model.add(MaxPooling1D(pool_size=2))
CNN_W_model.add(Flatten())
CNN_W_model.add(Dense(64, activation='relu'))
CNN_W_model.add(Dense(1))
CNN_W_model.compile(loss='mean_squared_error', optimizer='adam')
CNN_W_model.fit(CNN_W_X_train, CNN_W_y_train, epochs=100, batch_size=32)


CNN_W_y_pred = CNN_W_model.predict(CNN_W_X_test)


CNN_W_predictions = pd.DataFrame({'index': df_numeric.index[train_size+seq_length:], 'Actual': CNN_W_y_test.flatten(), 'Predicted': CNN_W_y_pred.flatten()})


CNN_W_historical_data = pd.DataFrame({'Index': df_numeric.index, 'Value': CNN_W_closing_prices})




Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [25]:

CNN_W_mape = mean_absolute_percentage_error(CNN_W_y_test, CNN_W_y_pred)
CNN_W_mse = mean_squared_error(CNN_W_y_test, CNN_W_y_pred)
CNN_W_rmse = np.sqrt(CNN_W_mse)
CNN_W_mae = mean_absolute_error(CNN_W_y_test, CNN_W_y_pred)


print('MAPE:', CNN_W_mape)
print('MSE:', CNN_W_mse)
print('RMSE:', CNN_W_rmse)
print('MAE:', CNN_W_mae)

MAPE: 0.04094942173723167
MSE: 1.8337853999308202
RMSE: 1.3541733271375642
MAE: 1.060398244891477


In [26]:



np.set_printoptions(suppress=True, precision=4)  

CNN_W_N_pred = CNN_W_predictions['Predicted'].values



In [27]:
def CNN_W_CS_mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


CNN_W_C_mape = CNN_W_CS_mean_absolute_percentage_error(CNN_W_y_test, CNN_W_N_pred)

print(f'MAPE: {CNN_W_C_mape:.2f}%')


MAPE: 4.09%


In [28]:

fig = px.line(CNN_W_historical_data, x='Index', y='Value', title='CNN Model Synth Data without structural break')
fig.add_trace(px.line(CNN_W_predictions, x='index', y='Predicted', color_discrete_sequence=['red']).data[0])
fig.add_shape(type="line",
                      x0=break_point, y0=df_numeric['Value'].min(), x1=break_point, y1=df_numeric['Value'].max(),
                      line=dict(color="red", width=1, dash="dash"))
fig.show()


# Prophet (White)

In [10]:

train_size = int(len(df_date) * 0.8)
Prophet_W_train_data, Prophet_W_test_data = df_date[:train_size], df_date[train_size:]


Prophet_W_m = Prophet(changepoint_prior_scale=1.0,
            seasonality_prior_scale=10.0,
            daily_seasonality=True,
            weekly_seasonality=True,
            yearly_seasonality=True,
            seasonality_mode='multiplicative')


Prophet_W_m.fit(Prophet_W_train_data)


Prophet_W_future = pd.DataFrame({'ds': Prophet_W_test_data['ds']})


Prophet_W_forecast = Prophet_W_m.predict(Prophet_W_future)


fig = px.line(Prophet_W_forecast, x='ds', y='yhat', labels={'x':'Date', 'y':'Value'})
fig.add_scatter(x=df_date['ds'], y=df_date['y'], mode='lines', name='Actual')
fig.update_layout(title='Prophet model, Synth data without structural break')

fig.show()

DEBUG:cmdstanpy:input tempfile: /tmp/tmpqvdaf_ap/uidhw4mp.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpqvdaf_ap/bxkdfam0.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=47869', 'data', 'file=/tmp/tmpqvdaf_ap/uidhw4mp.json', 'init=/tmp/tmpqvdaf_ap/bxkdfam0.json', 'output', 'file=/tmp/tmpqvdaf_ap/prophet_modelfj9f96v7/prophet_model-20230527200024.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:00:24 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:00:29 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [30]:

Prophet_W_y_pred = Prophet_W_forecast['yhat'].values


Prophet_W_y_actual = Prophet_W_test_data['y'].values


Prophet_W_mape = mean_absolute_percentage_error(Prophet_W_y_actual, Prophet_W_y_pred)
Prophet_W_mse = mean_squared_error(Prophet_W_y_actual, Prophet_W_y_pred)
Prophet_W_rmse = np.sqrt(Prophet_W_mse)
Prophet_W_mae = mean_absolute_error(Prophet_W_y_actual, Prophet_W_y_pred)

print('MAPE:', Prophet_W_mape)
print('MSE:', Prophet_W_mse)
print('RMSE:', Prophet_W_rmse)
print('MAE:', Prophet_W_mae)

MAPE: 0.4053937828573874
MSE: 176.21459887001046
RMSE: 13.274584696705599
MAE: 11.427321250989381


In [33]:
def Prophet_W_CS_mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


mape = Prophet_W_CS_mean_absolute_percentage_error(Prophet_W_y_actual, Prophet_W_y_pred)

print(f'MAPE: {mape:.2f}%')


MAPE: 28.26%


# Color noise in two types

In [11]:
def generate_colored_noise(length, order, ar_params, ma_params, break_point=None, break_value=None, ar_params_after=None, ma_params_after=None):
    np.random.seed(0)
    if break_point is None or ar_params_after is None or ma_params_after is None:
        ar_params_full = ar_params
        ma_params_full = ma_params
    else:
        ar_params_full = ar_params + [0] * (length - break_point) 
        ma_params_full = ma_params + [0] * (length - break_point)  
        ar_params_full[break_point:] = ar_params_after  
        ma_params_full[break_point:] = ma_params_after  
    
    arima_data = arma_generate_sample(ar=ar_params_full, ma=ma_params_full, nsample=length)
    
    if break_point is not None and break_value is not None:
        arima_data[break_point:] += break_value
    
    index_numeric = pd.RangeIndex(start=0, stop=length, name='Index')
    index_date = pd.date_range(start='2023-01-01', periods=length, name='ds')
    
    df_numeric = pd.DataFrame({'Value': arima_data}, index=index_numeric)
    df_date = pd.DataFrame({'ds': index_date, 'y': arima_data}, index=index_date)
    
    return df_numeric, df_date


length = 2842  # Length of the time series
order = (2, 1)  # ARIMA model order (p, d, q)
ar_params = [0.3, -0.2]  # Autoregressive (AR) parameters
ma_params = [0.1, 0.4]  # Moving average (MA) parameters
break_point = None  # Index at which structural break occurs
break_value = 3  # Value at which time series changes after structural break
ar_params_after = [0.1, -0.3]  # Autoregressive (AR) parameters after the break
ma_params_after = [0.2, 0.7]  # Moving average (MA) parameters after the break

# Generate colored noise with a structural break
df_C_numeric, df_C_date = generate_colored_noise(length, order, ar_params, ma_params, break_point, break_value, ar_params_after, ma_params_after)


fig_numeric = go.Figure()
fig_numeric.add_trace(go.Scatter(x=df_C_numeric.index, y=df_C_numeric['Value'], mode='lines', name='Numeric Index'))
fig_numeric.add_shape(type="line",
                      x0=break_point, y0=df_C_numeric['Value'].min(), x1=break_point, y1=df_C_numeric['Value'].max(),
                      line=dict(color="red", width=1, dash="dash"))
fig_numeric.update_layout(title='Time Series without  Structural Break (Numeric Index)',
                          legend=dict(x=0, y=1, traceorder="normal"))


fig_date = go.Figure()
fig_date.add_trace(go.Scatter(x=df_C_date['ds'], y=df_C_date['y'], mode='lines', name='Date Index'))
fig_date.add_shape(type="line",
                   x0=df_C_date.index[break_point], y0=df_C_date['y'].min(),
                   x1=df_C_date.index[break_point], y1=df_C_date['y'].max(),
                   line=dict(color="red", width=1, dash="dash"))
fig_date.update_layout(title='Time Series without Structural Break (Date Index)',
                      legend=dict(x=0, y=1, traceorder="normal"))

fig_numeric.show()
fig_date.show()



Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.


Support for multi-dimensional indexing (e.g. `obj[:, None]`) is deprecated and will be removed in a future version.  Convert to a numpy array before indexing instead.



# Prophet (Color)

In [4]:

train_size = int(len(df_C_date) * 0.8)
Prophet_C_train_data, Prophet_C_test_data = df_C_date[:train_size], df_C_date[train_size:]


Prophet_C_m = Prophet(changepoint_prior_scale=0.5,
            seasonality_prior_scale=14.0,
            daily_seasonality=True,
            weekly_seasonality=False,
            yearly_seasonality=True,
            seasonality_mode='multiplicative')


Prophet_C_m.fit(Prophet_C_train_data)


Prophet_C_future = pd.DataFrame({'ds': Prophet_C_test_data['ds']})


Prophet_C_forecast = Prophet_C_m.predict(Prophet_C_future)


fig = px.line(Prophet_C_forecast, x='ds', y='yhat', labels={'x':'Date', 'y':'Value'})
fig.add_scatter(x=df_C_date['ds'], y=df_C_date['y'], mode='lines', name='Actual')
fig.update_layout(title='Prophet model, Synth data(noise like) without structural break')

fig.show()

DEBUG:cmdstanpy:input tempfile: /tmp/tmpt9b_oxwz/st_wl9u5.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt9b_oxwz/qf57er2q.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=56528', 'data', 'file=/tmp/tmpt9b_oxwz/st_wl9u5.json', 'init=/tmp/tmpt9b_oxwz/qf57er2q.json', 'output', 'file=/tmp/tmpt9b_oxwz/prophet_modelw9i9_1q_/prophet_model-20230527203942.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
20:39:42 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
20:39:52 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [5]:

Prophet_C_y_pred = Prophet_C_forecast['yhat'].values


Prophet_C_y_actual = Prophet_C_test_data['y'].values


Prophet_C_mape = mean_absolute_percentage_error(Prophet_C_y_actual, Prophet_C_y_pred)
Prophet_C_mse = mean_squared_error(Prophet_C_y_actual, Prophet_C_y_pred)
Prophet_C_rmse = np.sqrt(Prophet_C_mse)
Prophet_C_mae = mean_absolute_error(Prophet_C_y_actual, Prophet_C_y_pred)

print('MAPE:', Prophet_C_mape)
print('MSE:', Prophet_C_mse)
print('RMSE:', Prophet_C_rmse)
print('MAE:', Prophet_C_mae)

MAPE: 4.34787777567732
MSE: 5.862532503751442
RMSE: 2.4212667147077047
MAE: 1.9137708485455451


In [28]:
def Prophet_C_CS_mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


mape = Prophet_C_CS_mean_absolute_percentage_error(Prophet_C_y_actual, Prophet_C_y_pred)

print(f'MAPE: {mape:.2f}%')


MAPE: 287.67%


# CNN (Color)

In [6]:
CNN_C_closing_prices = df_C_numeric['Value'].values


train_size = int(len(CNN_C_closing_prices) * 0.8)
CNN_C_train_data = CNN_C_closing_prices[:train_size]
CNN_C_test_data = CNN_C_closing_prices[train_size:]


def create_sequences(data, seq_length):
    X = []
    y = []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)


seq_length = 15
CNN_C_X_train, CNN_C_y_train = create_sequences(CNN_C_train_data, seq_length)
CNN_C_X_test, CNN_C_y_test = create_sequences(CNN_C_test_data, seq_length)


input_shape = (CNN_C_X_train.shape[1], 1)  

CNN_C_model = Sequential()
CNN_C_model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape))
CNN_C_model.add(MaxPooling1D(pool_size=2))
CNN_C_model.add(Flatten())
CNN_C_model.add(Dense(64, activation='relu'))
CNN_C_model.add(Dense(1))
CNN_C_model.compile(loss='mean_squared_error', optimizer='adam')
CNN_C_model.fit(CNN_C_X_train, CNN_C_y_train, epochs=100, batch_size=32)


CNN_C_y_pred = CNN_C_model.predict(CNN_C_X_test)


CNN_C_predictions = pd.DataFrame({'index': df_C_numeric.index[train_size+seq_length:], 'Actual': CNN_C_y_test.flatten(), 'Predicted': CNN_C_y_pred.flatten()})


CNN_C_historical_data = pd.DataFrame({'Index': df_C_numeric.index, 'Value': CNN_C_closing_prices})


CNN_C_mape = mean_absolute_percentage_error(CNN_C_y_test, CNN_C_y_pred)
CNN_C_mse = mean_squared_error(CNN_C_y_test, CNN_C_y_pred)
CNN_C_rmse = np.sqrt(CNN_C_mse)
CNN_C_mae = mean_absolute_error(CNN_C_y_test, CNN_C_y_pred)


print('MAPE:', CNN_C_mape)
print('MSE:', CNN_C_mse)
print('RMSE:', CNN_C_rmse)
print('MAE:', CNN_C_mae)


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [7]:

np.set_printoptions(suppress=True, precision=4)  
CNN_C_y_pred = CNN_C_predictions['Predicted'].values


In [8]:
def CNN_C_CS_mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


CNN_C_mape = CNN_C_CS_mean_absolute_percentage_error(CNN_C_y_test, CNN_C_y_pred)

print(f'MAPE: {CNN_C_mape:.2f}%')


MAPE: 588.94%


In [9]:

fig = px.line(CNN_C_historical_data, x='Index', y='Value', title='CNN on White noise')
fig.add_trace(px.line(CNN_C_predictions, x='index', y='Predicted', color_discrete_sequence=['red']).data[0])
fig.show()


# LSTM (Color)

In [10]:
# Split the data into training and testing sets
train_size = int(len(df_C_numeric) * 0.8)
LSTM_C_train, LSTM_C_test = df_C_numeric[0:train_size], df_C_numeric[train_size:len(df_C_numeric)]

# Normalize the data
scaler = MinMaxScaler(feature_range=(0, 1))
LSTM_C_train_scaled = scaler.fit_transform(LSTM_C_train['Value'].values.reshape(-1, 1))

# Define the time series window size
window_size = 30

# Create sequences of data
LSTM_C_X_train, LSTM_C_y_train = [], []
for i in range(window_size, len(LSTM_C_train_scaled)):
    LSTM_C_X_train.append(LSTM_C_train_scaled[i-window_size:i, 0])
    LSTM_C_y_train.append(LSTM_C_train_scaled[i, 0])
LSTM_C_X_train, LSTM_C_y_train = np.array(LSTM_C_X_train), np.array(LSTM_C_y_train)

# Reshape the data for the LSTM
LSTM_C_X_train = np.reshape(LSTM_C_X_train, (LSTM_C_X_train.shape[0], LSTM_C_X_train.shape[1], 1))

# Build the LSTM model
LSTM_C_model = Sequential()
LSTM_C_model.add(LSTM(64, input_shape=(window_size, 1)))
LSTM_C_model.add(Dense(1))
LSTM_C_model.compile(loss='mean_squared_error', optimizer='adam')
LSTM_C_model.summary()

# Fit the model on the training data
LSTM_C_model.fit(LSTM_C_X_train, LSTM_C_y_train, epochs=100, batch_size=32, verbose=1)

# Generate predictions for the test data
LSTM_C_test_scaled = scaler.transform(LSTM_C_test['Value'].values.reshape(-1, 1))
LSTM_C_X_test, LSTM_C_y_test = [], []
for i in range(window_size, len(LSTM_C_test_scaled)):
    LSTM_C_X_test.append(LSTM_C_test_scaled[i-window_size:i, 0])
    LSTM_C_y_test.append(LSTM_C_test_scaled[i, 0])
LSTM_C_X_test, LSTM_C_y_test = np.array(LSTM_C_X_test), np.array(LSTM_C_y_test)
LSTM_C_X_test = np.reshape(LSTM_C_X_test, (LSTM_C_X_test.shape[0], LSTM_C_X_test.shape[1], 1))
LSTM_C_predicted = LSTM_C_model.predict(LSTM_C_X_test)
LSTM_C_predicted = scaler.inverse_transform(LSTM_C_predicted)

# Plot the actual and predicted values 
fig = px.line(df_C_numeric, y='Value', title='LSTM Model, Synth Data(noise) without structural break')
fig.add_scatter(x=LSTM_C_test.index[window_size:], y=LSTM_C_predicted.flatten(), mode='lines', name='Predicted')

# break indicator
fig.add_shape(type="line",
                      x0=break_point, y0=df_C_numeric['Value'].min(), x1=break_point, y1=df_C_numeric['Value'].max(),
                      line=dict(color="red", width=1, dash="dash"))
fig.show()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 64)                16896     
                                                                 
 dense_2 (Dense)             (None, 1)                 65        
                                                                 
Total params: 16,961
Trainable params: 16,961
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100


In [11]:

np.set_printoptions(suppress=True, precision=4)

LSTM_C_pred = LSTM_C_predicted

LSTM_C_real = LSTM_C_test['Value'].values[window_size:].reshape(-1, 1)

In [12]:
final_LSTM_C_mape = mean_absolute_percentage_error(LSTM_C_real, LSTM_C_pred )
final_LSTM_C_mape

5.240653162947915

In [13]:

LSTM_C_mae = np.mean(np.abs(LSTM_C_predicted - LSTM_C_test['Value'].values[window_size:].reshape(-1, 1)))
print(f'MAE: {LSTM_C_mae}')


LSTM_C_mse = np.mean((LSTM_C_predicted - LSTM_C_test['Value'].values[window_size:].reshape(-1, 1))**2)
print(f'MSE: {LSTM_C_mse:.2f}')


LSTM_C_rmse = np.sqrt(LSTM_C_mse)
print(f'RMSE: {LSTM_C_rmse:.2f}')


print(f'MAPE: {final_LSTM_C_mape:.2f}')

MAE: 1.0414660980588888
MSE: 1.65
RMSE: 1.29
MAPE: 5.24


# GRU (Color)

In [14]:

GRU_C_data = df_C_numeric.copy()
scaler = MinMaxScaler()
GRU_C_data_scaled = scaler.fit_transform(GRU_C_data)


train_size = int(len(GRU_C_data_scaled) * 0.8)
GRU_C_train_data = GRU_C_data_scaled[:train_size]
GRU_C_test_data = GRU_C_data_scaled[train_size:]


GRU_C_model = tf.keras.Sequential([
    tf.keras.layers.GRU(units=64, activation='tanh', input_shape=(1, 1)),
    tf.keras.layers.Dense(units=1)
])

GRU_C_model.compile(optimizer='adam', loss='mean_squared_error')
GRU_C_model.fit(GRU_C_train_data[:-1], GRU_C_train_data[1:], epochs=100, batch_size=32)


GRU_C_predictions = GRU_C_model.predict(GRU_C_test_data[:-1])


GRU_C_predictions = scaler.inverse_transform(GRU_C_predictions)


fig = px.line()
fig.add_scatter(x=GRU_C_data.index[train_size+1:], y=GRU_C_data['Value'][train_size+1:], name='Real Values')
fig.add_scatter(x=GRU_C_data.index[train_size+1:], y=GRU_C_predictions.flatten(), name='Predicted Values')
fig.update_layout(title='GRU Model - Real vs Predicted Values', xaxis_title='Date', yaxis_title='Value')
fig.show()


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [15]:

fig = px.line()
fig.add_scatter(x=GRU_C_data.index, y=GRU_C_data['Value'], name='All Data')
fig.add_scatter(x=GRU_C_data.index[train_size+1:], y=GRU_C_predictions.flatten(), name='Predicted Values')
fig.update_layout(title='GRU Model, synth data(noise) without break', xaxis_title='Date', yaxis_title='Value')
fig.show()

In [16]:



GRU_C_true_values = scaler.inverse_transform(GRU_C_test_data[1:])


GRU_C_mse = mean_squared_error(GRU_C_true_values, GRU_C_predictions)
GRU_C_rmse = np.sqrt(GRU_C_mse)
GRU_C_mae = mean_absolute_error(GRU_C_true_values, GRU_C_predictions)
def GRU_C_CS_mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

GRU_C_mape = GRU_C_CS_mean_absolute_percentage_error(GRU_C_true_values, GRU_C_predictions)

print(f'MAPE: {GRU_C_mape:.2f}%')


print('MSE:', GRU_C_mse)
print('RMSE:', GRU_C_rmse)
print('MAE:', GRU_C_mae)

MAPE: 510.93%
MSE: 1.711601827868087
RMSE: 1.3082820138900049
MAE: 1.055625099985307


In [17]:
GRU_C_N_MAPE = mean_absolute_percentage_error(GRU_C_true_values, GRU_C_predictions)

In [18]:
GRU_C_N_MAPE

5.109321680594677

# Real World Data

# LSTM (Real Data)

In [7]:

LSTM_R_tsla_df = yf.download("DUK", start="2012-01-01", end="2023-04-20")


LSTM_R_df = pd.DataFrame({'ds': LSTM_R_tsla_df.index, 'y': LSTM_R_tsla_df['Adj Close']})


train_size = int(len(LSTM_R_df) * 0.8)
LSTM_R_train, LSTM_R_test = LSTM_R_df[0:train_size], LSTM_R_df[train_size:len(LSTM_R_df)]


scaler = MinMaxScaler(feature_range=(0, 1))
LSTM_R_train_scaled = scaler.fit_transform(LSTM_R_train['y'].values.reshape(-1, 1))


window_size = 30


LSTM_R_X_train, LSTM_R_y_train = [], []
for i in range(window_size, len(LSTM_R_train_scaled)):
    LSTM_R_X_train.append(LSTM_R_train_scaled[i-window_size:i, 0])
    LSTM_R_y_train.append(LSTM_R_train_scaled[i, 0])
LSTM_R_X_train, LSTM_R_y_train = np.array(LSTM_R_X_train), np.array(LSTM_R_y_train)


LSTM_R_X_train = np.reshape(LSTM_R_X_train, (LSTM_R_X_train.shape[0], LSTM_R_X_train.shape[1], 1))


LSTM_R_model = Sequential()
LSTM_R_model.add(LSTM(64, input_shape=(window_size, 1)))
LSTM_R_model.add(Dense(1))
LSTM_R_model.compile(loss='mean_squared_error', optimizer='adam')
LSTM_R_model.summary()


LSTM_R_model.fit(LSTM_R_X_train, LSTM_R_y_train, epochs=100, batch_size=32, verbose=1)


LSTM_R_test_scaled = scaler.transform(LSTM_R_test['y'].values.reshape(-1, 1))
LSTM_R_X_test, LSTM_R_y_test = [], []
for i in range(window_size, len(LSTM_R_test_scaled)):
    LSTM_R_X_test.append(LSTM_R_test_scaled[i-window_size:i, 0])
    LSTM_R_y_test.append(LSTM_R_test_scaled[i, 0])
LSTM_R_X_test, LSTM_R_y_test = np.array(LSTM_R_X_test), np.array(LSTM_R_y_test)
LSTM_R_X_test = np.reshape(LSTM_R_X_test, (LSTM_R_X_test.shape[0], LSTM_R_X_test.shape[1], 1))
LSTM_R_predicted = LSTM_R_model.predict(LSTM_R_X_test)
LSTM_R_predicted = scaler.inverse_transform(LSTM_R_predicted)


fig = px.line(LSTM_R_df, x='ds', y='y', title='LSTM, Duke data')
fig.add_scatter(x=LSTM_R_test.index[window_size:], y=LSTM_R_predicted.flatten(), mode='lines', name='Predicted')
fig.show()




[*********************100%***********************]  1 of 1 completed
Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_1 (LSTM)               (None, 64)                16896     
                                                                 
 dense_3 (Dense)             (None, 1)                 65        
                                                                 
Total params: 16,961
Trainable params: 16,961
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30

In [11]:

np.set_printoptions(suppress=True, precision=4)  

LSTM_R_pred = LSTM_R_predicted

LSTM_R_real = LSTM_R_test['y'].values[window_size:].reshape(-1, 1)

In [12]:
final_LSTM_R_mape = mean_absolute_percentage_error(LSTM_R_real, LSTM_R_pred)
final_LSTM_R_mape

0.020053839684721235

In [13]:

LSTM_R_mae = np.mean(np.abs(LSTM_R_predicted - LSTM_R_test['y'].values[window_size:].reshape(-1, 1)))


LSTM_R_mse = np.mean((LSTM_R_predicted - LSTM_R_test['y'].values[window_size:].reshape(-1, 1))**2)
print(f'MSE: {LSTM_R_mse:.2f}')


LSTM_R_rmse = np.sqrt(LSTM_R_mse)
print(f'RMSE: {LSTM_R_rmse:.2f}')


print(f'MAPE: {final_LSTM_R_mape:.2f}')
print(f'MAE: {LSTM_R_mae}')

MSE: 1.95
RMSE: 1.40
MAPE: 0.02
MAE: 1.036916545238035


# CNN (Real Data)

In [23]:

start_date = '2012-01-01'
end_date = '2023-04-20'
CNN_R_data = yf.download('TSLA', start=start_date, end=end_date, progress=False)


CNN_R_closing_prices = CNN_R_data['Close'].values


train_size = int(len(CNN_R_closing_prices) * 0.8)
CNN_R_train_data = CNN_R_closing_prices[:train_size]
CNN_R_test_data = CNN_R_closing_prices[train_size:]


def create_sequences(data, seq_length):
    X = []
    y = []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length])
        y.append(data[i+seq_length])
    return np.array(X), np.array(y)


seq_length = 10
CNN_R_X_train, CNN_R_y_train = create_sequences(CNN_R_train_data, seq_length)
CNN_R_X_test, CNN_R_y_test = create_sequences(CNN_R_test_data, seq_length)


input_shape = (CNN_R_X_train.shape[1], 1) 


CNN_R_model = Sequential()
CNN_R_model.add(Conv1D(filters=32, kernel_size=3, activation='relu', input_shape=input_shape))
CNN_R_model.add(MaxPooling1D(pool_size=2))
CNN_R_model.add(Flatten())
CNN_R_model.add(Dense(64, activation='relu'))
CNN_R_model.add(Dense(1))
CNN_R_model.compile(loss='mean_squared_error', optimizer='adam')
CNN_R_model.fit(CNN_R_X_train, CNN_R_y_train, epochs=100, batch_size=32)


CNN_R_y_pred = CNN_R_model.predict(CNN_R_X_test)


CNN_R_predictions = pd.DataFrame({'Date': CNN_R_data.index[train_size+seq_length:], 'Actual': CNN_R_y_test.flatten(), 'Predicted': CNN_R_y_pred.flatten()})






Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

In [24]:

CNN_R_mape = mean_absolute_percentage_error(CNN_R_y_test, CNN_R_y_pred)
CNN_R_mse = mean_squared_error(CNN_R_y_test, CNN_R_y_pred)
CNN_R_rmse = np.sqrt(CNN_R_mse)
CNN_R_mae = mean_absolute_error(CNN_R_y_test, CNN_R_y_pred)


print('MAPE:', CNN_R_mape)
print('MSE:', CNN_R_mse)
print('RMSE:', CNN_R_rmse)
print('MAE:', CNN_R_mae)

MAPE: 0.05274041697746398
MSE: 257.37031755716686
RMSE: 16.042765271522452
MAE: 12.849167416048818


In [25]:

np.set_printoptions(suppress=True, precision=4)  
CNN_R_y_pred = CNN_R_predictions['Predicted'].values


In [26]:
def CNN_R_CS_mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


CNN_R_N_mape = CNN_R_CS_mean_absolute_percentage_error(CNN_R_y_test, CNN_R_y_pred)

print(f'MAPE: {CNN_R_N_mape:.2f}%')

MAPE: 5.27%


In [27]:

fig = px.line(CNN_R_predictions, x='Date', y=['Actual', 'Predicted'], title='Прогнозы и фактические значения TSLA')
fig.show()

# Prophet (Real Data)

In [31]:

Prophet_R_tesla = yf.download('TSLA', start='2012-01-01', end='2023-04-20')


Prophet_R_df = Prophet_R_tesla.reset_index()[['Date', 'Close']]
Prophet_R_df = Prophet_R_df.rename(columns={'Date': 'ds', 'Close': 'y'})


train_size = int(len(Prophet_R_df) * 0.8)
Prophet_R_train_data, Prophet_R_test_data = Prophet_R_df[:train_size], Prophet_R_df[train_size:]


Prophet_R_m = Prophet(changepoint_prior_scale=1.0,
            seasonality_prior_scale=10.0,
            daily_seasonality=False,
            weekly_seasonality=True,
            yearly_seasonality=True,
            seasonality_mode='multiplicative')


Prophet_R_m.fit(Prophet_R_train_data)


Prophet_R_future = pd.DataFrame({'ds': Prophet_R_test_data['ds']})


Prophet_R_forecast = Prophet_R_m.predict(Prophet_R_future)


fig = px.line(Prophet_R_forecast, x='ds', y='yhat', labels={'x':'Date', 'y':'Value'})
fig.add_scatter(x=Prophet_R_df['ds'], y=Prophet_R_df['y'], mode='lines', name='Actual')
fig.update_layout(title='Synthetic Time Series Forecast')

fig.show()

[*********************100%***********************]  1 of 1 completed


DEBUG:cmdstanpy:input tempfile: /tmp/tmpt9b_oxwz/qtit59a0.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmpt9b_oxwz/lxenrm52.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=7670', 'data', 'file=/tmp/tmpt9b_oxwz/qtit59a0.json', 'init=/tmp/tmpt9b_oxwz/lxenrm52.json', 'output', 'file=/tmp/tmpt9b_oxwz/prophet_modelxnttz4sz/prophet_model-20230527210953.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
21:09:53 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
21:09:58 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


In [32]:

Prophet_R_y_pred = Prophet_R_forecast['yhat'].values


Prophet_R_y_actual = Prophet_R_test_data['y'].values


Prophet_R_mape = mean_absolute_percentage_error(Prophet_R_y_actual, Prophet_R_y_pred)
Prophet_R_mse = mean_squared_error(Prophet_R_y_actual, Prophet_R_y_pred)
Prophet_R_rmse = np.sqrt(Prophet_R_mse)
Prophet_R_mae = mean_absolute_error(Prophet_R_y_actual, Prophet_R_y_pred)



print('MAPE:', Prophet_R_mape)
print('MSE:', Prophet_R_mse)
print('RMSE:', Prophet_R_rmse)
print('MAE:', Prophet_R_mae)

MAPE: 0.47880969823212866
MSE: 17771.19775541382
RMSE: 133.30865596582174
MAE: 100.02309192980015


In [33]:
def Prophet_R_CS_mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


Prophet_R_N_mape = Prophet_R_CS_mean_absolute_percentage_error(Prophet_R_y_actual, Prophet_R_y_pred)

print(f'MAPE: {Prophet_R_N_mape:.2f}%')


MAPE: 47.88%


# GRU (Real Data)

In [5]:

ticker = "DUK"
GRU_R_data = yf.download(ticker, start="2012-01-01", end="2023-04-20")
GRU_R_data = GRU_R_data[['Close']]  


scaler = MinMaxScaler()
GRU_R_data_scaled = scaler.fit_transform(GRU_R_data)


train_size = int(len(GRU_R_data_scaled) * 0.8)
GRU_R_train_data = GRU_R_data_scaled[:train_size]
GRU_R_test_data = GRU_R_data_scaled[train_size:]


GRU_R_model = tf.keras.Sequential([
    tf.keras.layers.GRU(units=64, activation='tanh', input_shape=(1, 1)),
    tf.keras.layers.Dense(units=1)
])

GRU_R_model.compile(optimizer='adam', loss='mean_squared_error')
GRU_R_model.fit(GRU_R_train_data[:-1], GRU_R_train_data[1:], epochs=100, batch_size=32)


GRU_R_predictions = GRU_R_model.predict(GRU_R_test_data[:-1])


GRU_R_predictions = scaler.inverse_transform(GRU_R_predictions)


fig = px.line()
fig.add_scatter(x=GRU_R_data.index[train_size+1:], y=GRU_R_data['Close'][train_size+1:], name='Real Values')
fig.add_scatter(x=GRU_R_data.index[train_size+1:], y=GRU_R_predictions.flatten(), name='Predicted Values')
fig.update_layout(title='GRU, Apple data', xaxis_title='Date', yaxis_title='Value')
fig.show()

[*********************100%***********************]  1 of 1 completed
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epo

In [6]:

fig = px.line()
fig.add_scatter(x=GRU_R_data.index, y=GRU_R_data['Close'], name='All Data')
fig.add_scatter(x=GRU_R_data.index[train_size+1:], y=GRU_R_predictions.flatten(), name='Predicted Values')
fig.update_layout(title='GRU, Duke data', xaxis_title='Date', yaxis_title='Value')
fig.show()

In [16]:

GRU_R_true_values = scaler.inverse_transform(GRU_R_test_data[1:])


GRU_R_mse = mean_squared_error(GRU_R_true_values, GRU_R_predictions)
GRU_R_rmse = np.sqrt(GRU_R_mse)
GRU_R_mae = mean_absolute_error(GRU_R_true_values, GRU_R_predictions)
def GRU_R_CS_mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100


GRU_R_mape = GRU_R_CS_mean_absolute_percentage_error(GRU_R_true_values, GRU_R_predictions)

print(f'MAPE: {GRU_R_mape:.2f}%')


print('MSE:', GRU_R_mse)
print('RMSE:', GRU_R_rmse)
print('MAE:', GRU_R_mae)

MAPE: 1.91%
MSE: 1.8934639885847306
RMSE: 1.3760319722247483
MAE: 1.009229737268367


In [17]:
Biba = mean_absolute_percentage_error(GRU_R_true_values, GRU_R_predictions)
Biba

0.019060962315039

# Indicators

LSTM indicators

In [None]:
price_diff = np.diff(LSTM_W_predicted.flatten())
buy_signals = []
sell_signals = []
threshold = 2.0  # Adjust this threshold based on your requirements

for i in range(len(price_diff)):
    if price_diff[i] > threshold:
        buy_signals.append(i)
    elif price_diff[i] < -threshold:
        sell_signals.append(i)


fig.add_trace(go.Scatter(x=LSTM_W_test.index[window_size:], y=LSTM_W_predicted.flatten(),
                         mode='lines', name='Predicted'))

for buy_signal in buy_signals:
    fig.add_trace(go.Scatter(x=[LSTM_W_test.index[window_size:][buy_signal]], 
                             y=[LSTM_W_predicted.flatten()[buy_signal]],
                             mode='markers', marker=dict(color='green'), name='Buy Signal'))

for sell_signal in sell_signals:
    fig.add_trace(go.Scatter(x=[LSTM_W_test.index[window_size:][sell_signal]], 
                             y=[LSTM_W_predicted.flatten()[sell_signal]],
                             mode='markers', marker=dict(color='red'), name='Sell Signal'))

fig.show()


In [None]:
balance = 300
shares = 0
transaction_history = []  

all_signals = sorted(buy_signals + sell_signals) 

for signal in all_signals:
    index = LSTM_W_test.index[window_size + signal]
    price = LSTM_W_test.loc[index, 'Value']

    if signal in buy_signals:
        if balance >= 3 * price:
            shares += 3
            balance -= 3 * price
            transaction_history.append(('Buy', index, price, shares))
        elif balance >= price:
            num_shares = balance // price
            shares += num_shares
            balance -= num_shares * price
            transaction_history.append(('Buy', index, price, shares))
        print("Current Balance after Buy:", balance)

    elif signal in sell_signals:
        if shares > 0:
            balance += shares * price
            transaction_history.append(('Sell', index, price, shares))
            shares = 0
            print("Current Balance after Sell:", balance)

print("Final Balance:", balance)


print("Transaction History:")
for transaction in transaction_history:
    action, index, price, shares = transaction
    print(f"{action} at index {index}, price: {price:.2f}, shares: {shares}")



GRU indicators

In [None]:
price_diff = np.diff(GRU_predictions.flatten())
buy_signals = []
sell_signals = []
threshold = 2.0  

for i in range(len(price_diff)):
    if price_diff[i] > threshold:
        buy_signals.append(i)
    elif price_diff[i] < -threshold:
        sell_signals.append(i)

fig.add_trace(go.Scatter(x=data.index[train_size+1:], y=GRU_predictions.flatten(),
                         mode='lines', name='Predicted'))

for buy_signal in buy_signals:
    fig.add_trace(go.Scatter(x=[data.index[train_size+1:][buy_signal]], 
                             y=[GRU_predictions.flatten()[buy_signal]],
                             mode='markers', marker=dict(color='green'), name='Buy Signal'))

for sell_signal in sell_signals:
    fig.add_trace(go.Scatter(x=[data.index[train_size+1:][sell_signal]], 
                             y=[GRU_predictions.flatten()[sell_signal]],
                             mode='markers', marker=dict(color='red'), name='Sell Signal'))

fig.show()


In [None]:
train_size = int(len(df_numeric) * 0.8)
GRU_DF_test_data = df_numeric[train_size:len(df_numeric)]

In [None]:
balance = 300
shares = 0
transaction_history = []  

all_signals = sorted(buy_signals + sell_signals) 

for signal in all_signals:
    index = GRU_DF_test_data.index[1 + signal]
    price = GRU_DF_test_data.loc[index, 'Value']

    if signal in buy_signals:
        if balance >= 3 * price:
            shares += 3
            balance -= 3 * price
            transaction_history.append(('Buy', index, price, shares))
        elif balance >= price:
            num_shares = balance // price
            shares += num_shares
            balance -= num_shares * price
            transaction_history.append(('Buy', index, price, shares))
        print("Current Balance after Buy:", balance)

    elif signal in sell_signals:
        if shares > 0:
            balance += shares * price
            transaction_history.append(('Sell', index, price, shares))
            shares = 0
            print("Current Balance after Sell:", balance)

print("Final Balance:", balance)


print("Transaction History:")
for transaction in transaction_history:
    action, index, price, shares = transaction
    print(f"{action} at index {index}, price: {price:.2f}, shares: {shares}")
