## Main code for Kaggle - Optiver Realized Volatility Prediction
@LaurentMombaerts 13/07/2021

**Lib Import / Data loading**

In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import glob
from sklearn.metrics import r2_score

# Load dataset
train = pd.read_csv('../input/optiver-realized-volatility-prediction/train.csv') 
train['row_id'] = train['stock_id'].astype(str) + '-' + train['time_id'].astype(str)
train = train[['row_id','target']]

test = pd.read_csv('../input/optiver-realized-volatility-prediction/test.csv') 
test = test.drop(['stock_id','time_id'],axis=1)

**Functions**

In [3]:
# Competition metric
def rmspe(y_true, y_pred):
    return  (np.sqrt(np.mean(np.square((y_true - y_pred) / y_true))))

# Prediction function (chose here which prediction strategy to use)
def prediction_function(pred,book_path_train,trade_path_train,targets,book_path_test,trade_path_test,all_stocks_ids,test_file):
    
    if pred == 'naive':
        # Naive prediction (persistence model)
        prediction = past_realized_volatility_per_stock(list_file=book_path_train,prediction_column_name='pred')
        
        # Merge and evaluate results
        prediction = train.merge(prediction[['row_id','pred']], on = ['row_id'], how = 'left')
        print(prediction.head(5))

        # Estimate performances
        R2 = round(r2_score(y_true = prediction['target'], y_pred = prediction['pred']),3)
        RMSPE = round(rmspe(y_true = prediction['target'], y_pred = prediction['pred']),3)

        print('--')
        print(f'Performance of prediction: R2 score: {R2}, RMSPE: {RMSPE}')
        
        prediction = prediction.drop(columns=['target'])
        prediction = prediction.rename(columns={'pred': 'target'})

    if pred == 'stupid_RF':
        # Stupid nonlinear regression between persistence and next volatility (random forest)
        prediction = stupidForestPrediction(book_path_train=book_path_train,
                                            prediction_column_name='pred',
                                            train_targets_pd=targets,
                                            book_path_test=book_path_test)
        
    if pred == 'entropy':
        prediction = entropy_Prediction(book_path_train=book_path_train,
                                        prediction_column_name='pred',
                                        train_targets_pd=targets,
                                        book_path_test=book_path_test,
                                        all_stocks_ids=all_stocks_ids,
                                        test_file=test_file)
        
    return prediction

**Main evaluation code**

In [4]:
# Models
# from persistence import *
from custom1 import *

In [9]:
# Glob book file train (contains all paths for each file in this folder)
list_order_book_file_train = glob.glob('/kaggle/input/optiver-realized-volatility-prediction/book_train.parquet/*') 
list_order_book_file_test = glob.glob('/kaggle/input/optiver-realized-volatility-prediction/book_test.parquet/*')
list_trade_file_train = glob.glob('/kaggle/input/optiver-realized-volatility-prediction/trade_train.parquet/*')
list_trade_file_test = glob.glob('/kaggle/input/optiver-realized-volatility-prediction/trade_train.parquet/*')

# Compute predictions
predictions = prediction_function(pred='entropy',
                                  book_path_train=list_order_book_file_train,
                                  trade_path_train=list_trade_file_train,
                                  targets=train,
                                  book_path_test=list_order_book_file_test,
                                  trade_path_test=list_trade_file_test,
                                  all_stocks_ids=all_stocks_ids,
                                  test_file=test)

predictions.to_csv('submission.csv',index = False)