In [None]:
import numpy as np
import pandas as pd
import time
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.callbacks import ModelCheckpoint
import datetime as dt
import pandas_datareader.data as web

start = dt.datetime(2019, 6, 15)
end = dt.datetime.now()

## Acquiring data from MOEX, Finam, Yahoo finance  

In [None]:
df_old=pd.read_excel('ML_DF_Regression.xlsx')
df_old.set_index('Date',inplace=True)


In [None]:
Brent = web.get_data_moex('BRQ9', start, end)
Brent.drop(['BOARDID', 'SECID', 'OPENPOSITIONVALUE', 'OPEN', 'LOW', 'HIGH', 'VALUE',  'OPENPOSITION','VOLUME'], 1, inplace=True)
Brent.index.name= 'Date'
Brent.sort_index(inplace=True) 
Brent.rename(columns={'CLOSE':'Brent'}, inplace=True)

EUR_USD = web.get_data_moex('EURUSD000TOM', start, end)
EUR_USD=EUR_USD.loc[EUR_USD.BOARDID=='CETS']
EUR_USD.drop(['BOARDID', 'SECID', 'SHORTNAME', 'VOLRUR', 'OPEN', 'LOW', 'HIGH', 'NUMTRADES',  'WAPRICE'], 1, inplace=True)
EUR_USD.index.name= 'Date'
EUR_USD.sort_index(inplace=True) 
EUR_USD.rename(columns={'CLOSE':'EUR_USD'}, inplace=True)

USD_RUB_New = web.get_data_moex('USD000UTSTOM', start, end)
USD_RUB_New=USD_RUB_New.loc[USD_RUB_New.BOARDID=='CETS']
USD_RUB_New.drop(['BOARDID', 'SECID', 'SHORTNAME',  'OPEN', 'WAPRICE'], 1, inplace=True)
USD_RUB_New.index.name= 'Date'
USD_RUB_New.rename(columns={'CLOSE':'USD_RUB','VOLRUR':'VOL_USD','LOW':'USD_LOW', 'HIGH':'USD_HIGH', 'NUMTRADES':'USD_NUMTRADES'}, inplace=True)
USD_RUB_New.sort_index(inplace=True) 

RGBITR_New= web.get_data_moex('RGBITR', start, end)
RGBITR_New.drop(['BOARDID', 'SECID', 'SHORTNAME', 'NAME', 'OPEN', 'LOW', 'HIGH','CLOSE','DECIMALS', 'CURRENCYID',  'DIVISOR','VALUE','DURATION','CAPITALIZATION'], 1, inplace=True)
RGBITR_New.index.name= 'Date'
RGBITR_New.index=pd.to_datetime(RGBITR_New.index)
RGBITR_New.rename(columns={'YIELD':'RGBITR_YIELD'}, inplace=True)
RGBITR_New.sort_index(inplace=True) 

USD_Repo = web.get_data_moex('MOEXREPOUSD', start, end)
USD_Repo.drop(['BOARDID', 'SECID', 'SHORTNAME', 'NAME', 'VALUE', 'OPEN', 'LOW', 'HIGH', 'DURATION', 'YIELD', 'DECIMALS', 'CAPITALIZATION', 'CURRENCYID', 'DIVISOR'], 1, inplace=True)
USD_Repo.rename(columns={'CLOSE':'USD_Repo'}, inplace=True)
USD_Repo.index.name= 'Date'
USD_Repo.index=pd.to_datetime(USD_Repo.index)
USD_Repo.sort_index(inplace=True) 

MOEXBC_New= web.get_data_moex('MOEXBC', start, end)
MOEXBC_New.drop(['BOARDID', 'SECID', 'SHORTNAME', 'NAME', 'OPEN', 'LOW', 'HIGH','DECIMALS', 'CURRENCYID',  'DIVISOR','VALUE','DURATION','CAPITALIZATION','YIELD'], 1, inplace=True)
MOEXBC_New.index.name= 'Date'
MOEXBC_New.index=pd.to_datetime(MOEXBC_New.index)
MOEXBC_New.rename(columns={'CLOSE':'MOEXBC'}, inplace=True)
MOEXBC_New.sort_index(inplace=True)

my_data=[ USD_RUB_New.USD_RUB, USD_RUB_New.VOL_USD, RGBITR_New.RGBITR_YIELD,EUR_USD.EUR_USD, USD_Repo.USD_Repo,MOEXBC_New.MOEXBC,Brent.Brent]
my_data=pd.concat(my_data,axis=1)
my_data.index=pd.to_datetime(my_data.index)
my_data.tail(3)

In [None]:
tickers = ['RSX','^IRX','EEM']
ETFs = pd.DataFrame()    

for ticker in tickers:
        ETF = web.get_data_yahoo(ticker, start, end)
        ETF.rename(columns={'Adj Close': ticker}, inplace=True)
        ETF.drop(['Open', 'High', 'Low', 'Close', 'Volume'], 1, inplace=True)
        if ETFs.empty:
            ETFs = ETF
        else:
            ETFs = ETFs.join(ETF, how='outer')
ETFs.rename(columns={'RSX': 'Russia_ETF', '^IRX':'Treas_4M','EEM': 'Emerging_Markets'}, inplace=True)

df_new=pd.merge(my_data,ETFs,on='Date')
df_new.sort_index(inplace=True)


In [None]:
main_df = df_old.append(df_new,sort=False)
main_df.index=pd.to_datetime(main_df.index)
main_df.sort_index(inplace=True)
main_df.drop_duplicates(subset='MOEXBC',inplace=True)
main_df.dropna(inplace=True)
main_df.tail(3)

## Saving database to excel file

In [None]:
main_df.to_excel('ML_DF_Regression.xlsx')

## Reading back and dropping redundant features

In [None]:
main_df=pd.read_excel('ML_DF_Regression.xlsx')
main_df.drop(['VOL_USD','Emerging_Markets','MOEXBC'], 1,inplace=True)
main_df=main_df.tail(601)
main_df.reset_index(drop=True,inplace=True)
main_df.tail(3)

In [None]:
main_df.drop('Date', 1,inplace=True)

EPOCHS =350 # how many passes through our data
BATCH_SIZE =5  # how many batches? Try smaller batch if you're getting OOM (out of memory) errors.

# opt='adam'
opt='rmsprop'

NAME = f"{BATCH_SIZE}-{opt}--{main_df.columns.tolist()}--{int(time.time())}" 


tensorboard = TensorBoard(log_dir="logs/{}".format(NAME))
filepath = "regression-{epoch:02d}-{val_mean_absolute_error:.3f}"  # unique file name that will include the epoch and the validation acc for that epoch
checkpoint = ModelCheckpoint("models/{}.model".format(filepath, monitor='val_mean_absolute_error', verbose=1, save_best_only=True, mode='min')) # saves only the best ones


## Train/test split

In [None]:
length = sorted(main_df.index.values)
last_20pct = sorted(main_df.index.values)[-int(0.15*len(length))]  # get the last 15% 

test_db=main_df.loc[(main_df.index > 509) & (main_df.index < 600)]
test_db.tail(3)

In [None]:
train_df = main_df[(main_df.index <= 509 )] 
train_df.tail(3)

## Feature-wise normalization

In [None]:
train_targets = train_df.USD_RUB.values
train_data=train_df.drop("USD_RUB", 1).values

test_targets = test_db.USD_RUB.values
test_data=test_db.drop("USD_RUB", 1).values

mean = train_data.mean(axis=0)
train_data -= mean
std = train_data.std(axis=0)
train_data /= std

test_data -= mean
test_data /= std


## Building and training our network

In [None]:
from keras import models
from keras import layers

model = models.Sequential()
model.add(layers.Dense(64, activation='relu',input_shape=(train_data.shape[1],)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))

model.compile(optimizer=opt, loss='mse', metrics=['mae'])

history = model.fit(
    train_data, train_targets,
    batch_size=BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=(test_data, test_targets),
    callbacks=[tensorboard, checkpoint],
   )


In [None]:
predict_df = main_df[(main_df.index == 600 )]
predict_df

In [None]:
predict_data=predict_df.drop("USD_RUB", 1).values
prediction_target=predict_df.USD_RUB.values
predict_data-= mean
predict_data /= std


In [None]:
model = tf.keras.models.load_model("regression-334-0.645.model")

In [None]:
prediction = model.predict(test_data[:1],batch_size=1)

print("Real value =", prediction_target)
print("Prediction =", prediction) 