Ce notebook est relatif au Challenge :  
[*Volatility prediction in financial markets
by CFM*](https://challengedata.ens.fr/participants/challenges/22/)  


## Importation des modules :

In [None]:
import torch.nn as nn

import fastai.tabular.all as fai

import pandas as pd
import warnings
warnings.filterwarnings("ignore")

import plotly.express as px

import featuretools as ft

from sklearn.preprocessing import StandardScaler

from tsai.all import mape

## Chargement des données

In [None]:
### Importation des données
X = pd.read_csv(
    'DATA/training_input.csv/training_input.csv',sep=';').set_index('ID').join( 
    pd.read_csv(
        'DATA/training_input.csv/challenge_34_cfm_trainingoutputfile.csv').set_index('ID'))

TEST=pd.read_csv('DATA/testing_input.csv/testing_input.csv',sep=';').set_index('ID')

###
# Récuperation des nom de features

volatility_ft = list(filter(lambda x:'volatility' in x,X.columns.tolist()))
return_ft = list(filter(lambda x:'return' in x,X.columns.tolist()))


X.head()

### FeatureEngineering  
Preparation :

In [None]:
# On synthétise les returns 

X['dir_mean']=(X[volatility_ft].fillna(0).values*X[return_ft].fillna(0).values).mean(axis=1)
X.drop(return_ft,axis=1,inplace=True)

TEST['dir_mean']=(TEST[volatility_ft].fillna(0).values*TEST[return_ft].fillna(0).values).mean(axis=1)
TEST.drop(return_ft,axis=1,inplace=True)


In [None]:
splits = fai.RandomSplitter(valid_pct=0.15)(fai.range_of(X))

to = fai.TabularPandas(X, 
                   procs=[fai.Categorify,
                          fai.FillMissing,
                          fai.Normalize
                         ],
                   cat_names =['product_id'],
                   cont_names = volatility_ft+['dir_mean'],
                   y_names='TARGET',
                   splits=splits)

dls = to.dataloaders(bs=1024)

In [None]:
emb_szs = fai.get_emb_sz(dls.train_ds)
n_out = fai.get_c(dls)
n_cont = len(dls.cont_names)


learn = fai.TabularLearner(dls,
                        model = fai.TabularModel(emb_szs=emb_szs,
                                             n_cont=n_cont,
                                             out_sz=n_out,
                                             layers = [300,200,100,50,25],
                                             embed_p=0.,
                                            bn_cont=True),
                        metrics=[mape,fai.R2Score()],
                        loss_func=mape,
                        cbs=[fai.ShowGraphCallback()])


In [None]:
learn.fit_one_cycle(30,learn.lr_find())

In [None]:
dl = learn.dls.test_dl(TEST)
test['TARGET'] = learn.get_preds(dl=dl)[0].to().numpy().reshape(-1)
test[['ID','TARGET']].to_csv('res.csv',index=False)