In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras import Sequential, layers, models
from tensorflow.keras.layers.experimental.preprocessing import Normalization
from tensorflow.keras.utils import normalize

from tensorflow.keras.optimizers import RMSprop, Adam
from tensorflow.keras.metrics import MAPE, MAE
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras import regularizers

#---Import moduls from stock-prediction package
from stock_prediction.data_prep import Data_Prep
from stock_prediction.features_exo import exo_selection

#---API related
import os 
from math import sqrt 
import yfinance as yf
from datetime import date, timedelta, datetime
from stock_prediction.data_prep_api import Data_Prep_Api
from stock_prediction.features_exo_api import exo_selection_api

In [2]:
from stock_prediction.data_prep import Data_Prep
from stock_prediction.features_exo import exo_selection
from stock_prediction.garch import garch
from stock_prediction.arima import arima_multi_day

import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pandas.plotting import lag_plot
from pandas import datetime
from statsmodels.tsa.arima_model import ARIMA
from sklearn.metrics import mean_squared_error
from arch import arch_model
from arch.__future__ import reindexing

  from pandas import datetime
  from pandas import datetime


In [3]:
from statsmodels.tsa.arima_model import ARIMA
from sklearn.model_selection import TimeSeriesSplit
from sklearn.metrics import mean_absolute_percentage_error as MAPE
import pdb

In [4]:
stock = Data_Prep('vinci', 20)

In [5]:
def load_preproc_data(company='vinci'):
    """This function allows to load the data for chosen company,
    to select features make changes directly inside this function."""
    #---(1) Instantiate the Class 
    prep_class = Data_Prep(company, 20)



    #---(2) Load the first dataframe ready to use if no more modification
    df = prep_class.data_prep()



    #---(3) Function that allows us to delete some of the features we don't want to use for modelling 
    # Log Return is to False by default
    # *******************
    # if period < 252 , don't use Annual_vol
    #********************
    prep_class.select_features(df, Return = True, Log_Return=False, High_Low=True, High_Close=True, Low_Close=True,
                            Volume_Change=False, Period_Volum=True, Annual_Vol=False,
                            Period_Vol=True, Return_Index=True, Volum_Index=True, Relative_Return=True)

    #---(4) Function that add the exogenous features that you need to select 
    # returns the dataframe ready to modelling 
    # and the list of the columns to rebase it later - no VIX because no rebase for VIX - kept unchanged 
    df = exo_selection(df, sp500=True, gold=True, eurusd=True, nasdaq=True, crude=True, vix=True)

    #---(5) Sort values to make sure they are ordered *in case and drop data column
    df = df.sort_values('Date')
#     df = df.drop(columns='Date')
    
    df = df.set_index('Date')
    
    #---Company code on stock market
    idx = df.columns[0][7:]
    
    return df, idx

In [6]:
data_vinci, idx_vinci = load_preproc_data()

In [7]:
data_vinci.head()

Unnamed: 0_level_0,Return_DG.PA,High-Low_DG.PA,High-Close_DG.PA,Low-Close_DG.PA,Period_Volum_DG.PA,Period_Vol_DG.PA,Return_stoxx_50,Period_Volum_stoxx_50,DG.PA_relatif,Return_S&P500,Return_Gold,Return_Usd,Return_Nasdaq,Return_Crude,Vix_No_Rebase
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2016-07-05,-0.014003,0.014423,0.010053,0.004327,-0.383607,0.380593,-0.017235,-0.097841,0.003232,-0.005324,0.014738,0.001349,-0.008158,-0.048786,0.1558
2016-07-06,-0.00734,0.016859,0.008359,0.008429,-0.228361,0.37973,-0.018312,-0.030392,0.010972,0.000872,0.006267,-0.00727,0.007518,0.017811,0.1496
2016-07-07,0.014789,0.014715,0.00491,0.009757,-0.291577,0.384615,0.006783,-0.305155,0.008006,-0.015024,-0.003517,0.003075,0.003632,-0.048282,0.1476
2016-07-08,0.007445,0.020584,0.00566,0.01484,-0.400676,0.381545,0.02083,-0.100105,-0.013385,-0.003397,-0.002573,-0.002999,0.016394,0.005981,0.132
2016-07-11,0.029088,0.024245,0.000458,0.023776,-0.385321,0.390928,0.01667,-0.276847,0.012418,-0.006961,-0.001179,-0.001966,0.006432,-0.014314,0.1354


In [8]:
data_vinci.shape

(1259, 15)

In [9]:
model = models.load_model('/home/khody/code/anastasia-77/Jaouad-AHRAOUI/stock_prediction/stock_prediction/saved_models_cnn/saved_5y_cnn')

In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
normalization (Normalization (None, 20, 15)            31        
_________________________________________________________________
conv1d (Conv1D)              (None, 19, 128)           3968      
_________________________________________________________________
dropout (Dropout)            (None, 19, 128)           0         
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 9, 128)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 8, 64)             16448     
_________________________________________________________________
dropout_1 (Dropout)          (None, 8, 64)             0         
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 4, 64)             0

In [12]:
from stock_prediction.trainer_CNN import prediction_return_CNN

prediction_cnn = prediction_return_CNN(start='2016-07-05', stop = '2021-06-11')
print(prediction_cnn)

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%********

In [18]:
data_vinci.loc['2020-06-11':'2021-06-11']

Unnamed: 0_level_0,Return_DG.PA,High-Low_DG.PA,High-Close_DG.PA,Low-Close_DG.PA,Period_Volum_DG.PA,Period_Vol_DG.PA,Return_stoxx_50,Period_Volum_stoxx_50,DG.PA_relatif,Return_S&P500,Return_Gold,Return_Usd,Return_Nasdaq,Return_Crude,Vix_No_Rebase
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-06-11,-0.041958,0.033902,0.031387,0.002439,0.216571,0.492029,-0.045280,0.217205,0.003322,-0.012892,0.010915,0.002931,-0.052655,-0.082323,0.4079
2020-06-12,0.009489,0.048894,0.028923,0.019410,-0.059046,0.491922,0.002916,-0.098983,0.006573,-0.008244,-0.001559,-0.006807,0.010121,-0.002201,0.3609
2020-06-15,0.006267,0.057731,0.009341,0.047942,-0.094307,0.386201,-0.005498,-0.158819,0.011765,-0.018610,-0.005204,-0.003195,0.014309,0.023718,0.3440
2020-06-16,0.028503,0.030896,0.017932,0.012736,0.074599,0.388218,0.033876,0.045021,-0.005373,0.003613,0.005406,0.007619,0.017464,0.033944,0.3367
2020-06-17,0.000466,0.028077,0.022812,0.005147,0.042237,0.388142,0.007590,-0.190489,-0.007124,-0.000594,-0.000231,-0.006354,0.001481,-0.010943,0.3347
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-31,-0.005139,0.016498,0.014529,0.001941,-0.317047,0.183277,-0.007640,-0.394903,0.002501,0.000000,0.000000,-0.000256,0.000000,0.000000,0.1676
2021-06-01,0.011192,0.010714,0.004044,0.006642,-0.098013,0.180349,0.007994,-0.134432,0.003199,-0.001445,0.000210,0.003511,-0.000892,0.021110,0.1790
2021-06-02,-0.000958,0.011579,0.005113,0.006433,-0.243402,0.178260,0.004114,-0.181536,-0.005072,0.003642,0.002417,-0.001234,0.001445,0.016391,0.1748
2021-06-03,-0.004474,0.016505,0.008347,0.008091,-0.248169,0.176500,-0.002265,-0.188683,-0.002209,-0.008757,-0.019030,-0.000647,-0.010309,-0.000291,0.1804


In [18]:
data_vinci.loc['2020-06-11':'2021-06-11'].isna().sum()

Return_DG.PA             0
High-Low_DG.PA           0
High-Close_DG.PA         0
Low-Close_DG.PA          0
Period_Volum_DG.PA       0
Period_Vol_DG.PA         0
Return_stoxx_50          0
Period_Volum_stoxx_50    0
DG.PA_relatif            0
Return_S&P500            0
Return_Gold              0
Return_Usd               0
Return_Nasdaq            0
Return_Crude             0
Vix_No_Rebase            0
dtype: int64

In [23]:
len_ = 20
dates_vinci = prediction_cnn_last_year['vinci'][1][len_+1:]
df_pred_vinci = pd.DataFrame({'date':dates_vinci, 'prediction': prediction_cnn_last_year['vinci'][0]})

In [24]:
df_pred_vinci.head()

Unnamed: 0,date,prediction
0,2020-06-23,0.001192
1,2020-06-24,0.001192
2,2020-06-25,0.001192
3,2020-06-26,0.001192
4,2020-06-29,0.001192


In [28]:
pred_garsh = garch('vinci', 1209)

In [25]:
pred_arima = arima_multi_day('vinci', 1209, 0.05)

statsmodels.tsa.arima_model.ARMA and statsmodels.tsa.arima_model.ARIMA have
been deprecated in favor of statsmodels.tsa.arima.model.ARIMA (note the .
between arima and model) and
statsmodels.tsa.SARIMAX. These will be removed after the 0.12 release.

statsmodels.tsa.arima.model.ARIMA makes use of the statespace framework and
is both well tested and maintained.

removed, use:












































































































In [29]:
from stock_prediction.trainer_CNN import prediction_return_CNN
prediction_cnn_last_year_vinci = prediction_return_CNN(start='2016-07-05', stop = '2021-06-11')

[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [37]:
len_ = 20
dates_vinci = prediction_cnn_last_year_vinci['vinci'][1][len_+1:]
df_pred_vinci = pd.DataFrame({'Date':dates_vinci, 'prediction': prediction_cnn_last_year_vinci['vinci'][0]})

In [38]:
df_pred_vinci.head()

Unnamed: 0,Date,prediction
0,2017-07-18,0.001192
1,2017-07-19,0.001192
2,2017-07-20,0.001192
3,2017-07-21,0.001192
4,2017-07-24,0.001192


In [33]:
pred_garsh.head()

Unnamed: 0,garch_pred,Date,vol_variation
51,20.409851,2016-09-14,0.182527
52,20.349999,2016-09-15,-0.002933
53,20.582436,2016-09-16,0.011422
54,20.418031,2016-09-19,-0.007988
55,21.500385,2016-09-20,0.05301


In [34]:
pred_arima.head()

Unnamed: 0,yesterday,prediction,conf_low,conf_high,true,conf_std,Date,perf_pred,perf_true,perf_low,perf_high
41,101.311661,100.229275,98.853678,101.604872,100.998656,0.701848,2016-09-29,-0.010684,-0.00309,-0.024262,0.002894
42,100.998656,102.686427,101.292483,104.08037,101.535259,0.711209,2016-09-30,0.016711,0.005313,0.002909,0.030512
43,101.535259,101.803834,100.37707,103.230599,101.445808,0.727954,2016-10-03,0.002645,-0.000881,-0.011407,0.016697
44,101.445808,101.608547,100.183373,103.033721,101.997306,0.727143,2016-10-04,0.001604,0.005436,-0.012444,0.015653
45,101.997306,101.715455,100.285934,103.144976,100.298096,0.729361,2016-10-05,-0.002763,-0.016659,-0.016779,0.011252


In [35]:
pred_final = pred_arima.merge(pred_garsh, how='left', on='Date')

In [53]:
pred_final['Date'] = pd.to_datetime(pred_final['Date'])

In [56]:
type(pred_final['Date'][0])

pandas._libs.tslibs.timestamps.Timestamp

In [57]:
pred_final = pred_final.merge(df_pred_vinci, how='left', on='Date')

In [60]:
pred_final.dropna(inplace=True)

In [61]:
pred_final

Unnamed: 0,yesterday,prediction_x,conf_low,conf_high,true,conf_std,Date,perf_pred,perf_true,perf_low,perf_high,garch_pred,vol_variation,prediction_y
204,119.296089,118.103244,116.595877,119.610611,118.360242,0.769079,2017-07-18,-0.009999,-0.007845,-0.022635,0.002636,13.625373,-0.034395,0.001192
205,118.360242,120.281207,118.774586,121.787827,120.247250,0.768698,2017-07-19,0.016230,0.015943,0.003501,0.028959,13.040944,-0.042893,0.001192
206,120.247250,118.188746,116.685026,119.692466,119.587587,0.767218,2017-07-20,-0.017119,-0.005486,-0.029624,-0.004614,12.585191,-0.034948,0.001192
207,119.587587,117.554050,116.021688,119.086413,116.780070,0.781832,2017-07-21,-0.017005,-0.023477,-0.029818,-0.004191,11.978393,-0.048215,0.001192
208,116.780070,117.034602,115.535168,118.534036,117.807949,0.765031,2017-07-24,0.002180,0.008802,-0.010660,0.015019,11.762091,-0.018058,0.001192
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1191,162.465135,161.858150,159.280880,164.435419,161.875872,1.314958,2021-05-28,-0.003736,-0.003627,-0.019600,0.012127,14.462025,0.015707,0.001192
1193,161.043955,161.722623,159.388035,164.057211,162.846429,1.191138,2021-06-01,0.004214,0.011192,-0.010282,0.018711,13.084130,-0.068074,0.001192
1194,162.846429,163.408191,161.066086,165.750296,162.690453,1.194974,2021-06-02,0.003450,-0.000958,-0.010933,0.017832,13.287345,0.015531,0.001192
1195,162.690453,162.749535,160.421126,165.077943,161.962521,1.187985,2021-06-03,0.000363,-0.004474,-0.013949,0.014675,13.311260,0.001800,0.001192


In [62]:
pred_final.to_csv('/home/khody/code/anastasia-77/Jaouad-AHRAOUI/stock_prediction/stock_prediction/data/test_stacking')