# Imports

In [None]:
# Basic Utils
import pandas as pd
import numpy as np
import os

# Plotting
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt

# Methods from de Prado
from deprado import *

# Read Raw Data
- All data from refinitiv was previously loaded and combined.
- The data was previously resampled to daily frequency using the forwardfill technique
- In this notebook we apply fractional differentiation and the standard scaler to obtain the finaly feature space

In [None]:
eco = pd.read_parquet(f'data/eco_raw.parquet')
eco.head()

# Compute Money Velocity

In [None]:
eco['M1 Money Velocity'] = eco['GDP'] / eco['M1 Money Supply']
eco['M2 Money Velocity'] = eco['GDP'] / eco['M2 Money Supply']

# Fractional Differentiation
- apply fracdiff by calling getMinFFD from deprado.py
- this step takes a few minutes
- out saved the optimal fracdiff coefficients
- stats saved the summary statistics of all features

In [None]:
stats, out = getMinFFD(eco)

In [None]:
# plot example adf values for each d of one feature
adfstats = stats[7]
plt.style.use(['science','ieee','no-latex'])
plt.figure()
plt.plot(adfstats['adfStat'])
plt.hlines(adfstats['95% conf'].mean(), xmin=0, xmax=1, color='black', linestyles='dashed')
plt.legend(['ADF Statistic', '95% Confidence Interval'])
plt.xlabel('d')
plt.ylabel('ADF Statistic')
plt.show()

In [None]:
# compute all fractional differenced time series with optimal d
ecoFFD = pd.DataFrame()
i = 0
for col in eco:
    ecoFFD[col] = fracDiff_FFD(pd.DataFrame(eco[col]), out[i], thres=.01)
    i += 1

ecoFFD.dropna(inplace=True)

# Apply Standard Scaler

In [None]:
from sklearn.preprocessing import StandardScaler

data = ecoFFD.copy().dropna()

scaler = StandardScaler()
scaler.fit(data)
X = scaler.transform(data)
X = pd.DataFrame(X, columns=data.columns, index=data.index)
fig = px.line(X, title='FracDiff Feature Space + Standard Scaler')
fig.show()


Finally the data was saved as features.parquet