## Data Preprocessing

In [None]:
import pandas as pd
import numpy as np

### Utils

In [None]:
def dict_to_df(dataset):
    """
    Transforms a dictionary of price series into a DataFrame.

    :param dataset: A dictionary containing tickers as keys and corresponding price series as values.
    :return: A DataFrame with tickers as columns.
    """

    first_count = True
    for k in dataset.keys():
        if dataset[k] is not None:
            if first_count:
                df = dataset[k]
                first_count = False
            else:
                df = pd.concat([df, dataset[k]], axis=1)

    return df


In [None]:
def split_data(df_prices, training_dates, testing_dates, remove_nan=True):
    """
    Splits a DataFrame of prices into training and testing sets.

    :param df_prices: DataFrame containing prices for all dates.
    :param training_dates: Tuple (training start date, training end date).
    :param testing_dates: Tuple (testing start date, testing end date).
    :param remove_nan: Whether to remove columns with any NaN values.

    :return: A tuple with DataFrames for training and testing prices.
    """

    if remove_nan:
        dataset_mask = (df_prices.index >= training_dates[0]) & (df_prices.index <= testing_dates[1])
        df_prices_dataset = df_prices[dataset_mask]
        print(f'Total of {df_prices_dataset.shape[1]} tickers')

        df_prices_dataset_without_nan = df_prices_dataset.dropna(axis=1)
        print(f'Total of {df_prices_dataset_without_nan.shape[1]} tickers after removing tickers with NaN values')

        df_prices = df_prices_dataset_without_nan.copy()

    train_mask = df_prices.index <= training_dates[1]
    test_mask = df_prices.index >= testing_dates[0]
    df_prices_train = df_prices[train_mask]
    df_prices_test = df_prices[test_mask]

    return df_prices_train, df_prices_test


In [None]:
# def outliers(df_real):
#     df = df_real.copy()
    
#     outliers = []
#     for ticker in df.columns:
#         outliers_idx = df[ticker][np.abs(df[ticker].pct_change()) > 0.5].index
#         if len(outliers_idx)>0:
#             outliers.append((ticker, outliers_idx))
        
#     return outliers

## Commodity-related ETF Dataset

In [None]:
from tiingo import TiingoClient

config = {}

# To reuse the same HTTP Session across API calls (and have better performance), include a session key.
config['session'] = True

############################################
#### WARNING: This API key has a maximum request of 5000 per hour and 50000 per day!!!!!! Use it as wisely as you can
############################################
config['api_key'] = "07f7d2d67590540f45370e46555b2ea8c7eded8b"

# Initialize
client = TiingoClient(config)

In [None]:
df = pd.read_excel('data/etfs/commodity_ETFs_long_updated.xlsx')

# remove duplicated
unique_df = df[~df.duplicated(subset=['Ticker'], keep='first')].sort_values(['Ticker'])
tickers = list(unique_df.Ticker.unique())

In [None]:
len(tickers)

213

### Price

In [None]:
tickers = list(tickers)
frequency = 'daily'

error_counter = 0
dataset_tiingo = {key: None for key in tickers}
for ticker in tickers:
    try:
        df = client.get_dataframe([ticker],
                                  frequency=frequency,
                                  metric_name='adjClose',
                                  startDate='2015-01-01',
                                  endDate='2020-01-01')
        series = df[ticker]
        series.name = ticker  # filter close price only
        dataset_tiingo[ticker] = series.copy()
    except:
        error_counter = error_counter + 1
        print('Not Possible to retrieve information for ' + ticker)

print('\nUnable to download ' + str(error_counter / len(tickers) * 100) + '% of the ETFs')

df_prices = dict_to_df(dataset_tiingo)
df_prices


Not Possible to retrieve information for DGL
Not Possible to retrieve information for DGLD
Not Possible to retrieve information for DSLV
Not Possible to retrieve information for OILU


ERROR:root:b'{"detail":"Error: Ticker \'OLEM\' not found"}'


Not Possible to retrieve information for OLEM

Unable to download 2.3474178403755865% of the ETFs


Unnamed: 0,AAAU,AGQ,AMJ,AMJL,AMLP,AMU,AMUB,AMZA,AOIL,ATMP,...,XES,XLE,XLEY,XME,XOP,YGRN,YMLI,YMLP,ZMLP,ZSL
2018-08-15 00:00:00+00:00,11.74,23.58,19.758073,13.624200,36.480761,14.856799,12.588523,40.701518,24.867249,16.000505,...,146.352852,56.280787,,30.210828,141.559361,,12.609899,51.077909,92.311608,171.32
2018-08-16 00:00:00+00:00,11.74,24.31,19.902897,13.909112,36.781978,14.955570,12.588523,41.009863,24.921386,16.103402,...,147.294027,56.687768,,30.563628,142.898925,,12.581005,51.374343,92.540811,166.24
2018-08-17 00:00:00+00:00,11.82,24.90,20.158062,14.385293,37.150133,15.153112,12.588523,41.318208,25.192075,16.096052,...,148.705791,56.828646,,31.027839,143.731627,,12.725290,51.511159,93.744128,162.44
2018-08-20 00:00:00+00:00,11.90,24.69,20.351160,14.481661,37.518287,15.268345,12.588523,41.472380,25.110868,16.382694,...,151.435201,57.212147,,31.222807,145.216009,,12.868467,52.172436,94.890144,163.84
2018-08-21 00:00:00+00:00,11.93,24.68,20.158062,14.231664,37.116664,15.153112,12.822792,41.472380,25.264258,16.294497,...,154.070494,57.501730,,31.594176,147.967547,,12.776062,51.761988,93.916031,163.60
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-08-08 00:00:00+00:00,,27.01,20.137373,14.211168,37.298818,15.136651,12.588523,41.575162,25.294936,16.360645,...,158.776373,59.114001,,32.476176,150.972515,,12.793159,51.021535,92.827315,150.24
2018-08-09 00:00:00+00:00,,27.07,20.192544,14.521249,37.484819,15.391809,12.588523,41.729334,25.343119,16.529690,...,156.611669,58.566142,,32.336913,149.560542,,12.896304,51.648390,93.686827,149.72
2018-08-10 00:00:00+00:00,,26.64,20.330471,14.664619,37.250539,15.301269,12.588523,41.729334,25.362248,16.463542,...,158.494021,58.926164,,32.216218,151.008720,,12.922998,52.163307,94.317136,152.44
2018-08-13 00:00:00+00:00,,25.50,19.985653,13.815608,37.150133,14.972032,12.588523,40.958472,25.300350,16.125452,...,154.070494,58.174814,,31.696302,147.243458,,12.711443,51.670778,92.712714,159.08


In [None]:
# sort by the timestamp
df_prices = df_prices.sort_index()
df_prices

Unnamed: 0,AAAU,AGQ,AMJ,AMJL,AMLP,AMU,AMUB,AMZA,AOIL,ATMP,...,XES,XLE,XLEY,XME,XOP,YGRN,YMLI,YMLP,ZMLP,ZSL
2015-01-02 00:00:00+00:00,,38.42,25.599871,,43.707435,19.249564,,62.214415,,18.775602,...,254.709625,56.244960,,27.040231,168.479924,,14.770499,98.249776,139.746985,235.58
2015-01-05 00:00:00+00:00,,40.46,24.437728,,42.519464,18.378131,,57.465489,,18.012219,...,242.350297,53.918217,,26.041661,157.677695,,14.389459,95.068211,135.372226,224.32
2015-01-06 00:00:00+00:00,,42.10,23.946682,,41.950228,18.059473,,57.175761,,17.721406,...,234.737668,53.126134,,25.682526,152.903807,,14.195475,94.310695,133.123518,215.26
2015-01-07 00:00:00+00:00,,42.13,23.968506,,41.801731,18.020454,,56.540442,,17.569941,...,232.409099,53.239288,,25.796398,151.370587,,14.119267,93.401676,132.346691,215.12
2015-01-08 00:00:00+00:00,,40.94,24.279502,,42.172972,18.235060,,57.312115,,17.794170,...,236.260193,54.434485,,26.041661,156.492934,,14.257827,95.522720,134.595399,221.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-24 00:00:00+00:00,14.960,31.39,16.987634,9.288735,31.925385,12.766248,10.683127,30.300696,26.3328,14.393392,...,78.159666,51.070113,,27.910125,87.337064,46.7917,,46.812284,82.570177,108.12
2019-12-26 00:00:00+00:00,15.072,31.97,17.204037,9.557884,32.333733,12.913941,10.805842,30.761793,26.5800,14.490263,...,78.159666,51.053628,,28.044170,87.668724,46.8312,,47.117748,84.012751,106.24
2019-12-27 00:00:00+00:00,15.075,31.33,16.956719,9.407243,31.814017,12.738555,10.677725,30.103083,26.5470,14.377247,...,76.818691,50.839325,,27.938849,86.194680,47.0237,,46.735918,82.913647,108.24
2019-12-30 00:00:00+00:00,15.110,31.88,16.717130,9.203166,31.331424,12.581631,10.516421,29.641985,26.4320,14.199650,...,78.063882,50.683543,,27.919699,86.084127,47.2522,,46.455910,81.951931,106.48


In [None]:
# We try to fill sporadic null values. For this purpose we can use either the `interpolate` or the
# `fillna` function.
# src: https://datascience.stackexchange.com/questions/25924/difference-between-interpolate-and-fillna-in-pandas.
# Using the limit parameter, we can set the maximum number of consecutive NaNs to fill.

limit = 10
df_prices_interpolate = df_prices.interpolate(method="linear",limit=limit)
df_prices_interpolate

Unnamed: 0,AAAU,AGQ,AMJ,AMJL,AMLP,AMU,AMUB,AMZA,AOIL,ATMP,...,XES,XLE,XLEY,XME,XOP,YGRN,YMLI,YMLP,ZMLP,ZSL
2015-01-02 00:00:00+00:00,,38.42,25.599871,,43.707435,19.249564,,62.214415,,18.775602,...,254.709625,56.244960,,27.040231,168.479924,,14.770499,98.249776,139.746985,235.58
2015-01-05 00:00:00+00:00,,40.46,24.437728,,42.519464,18.378131,,57.465489,,18.012219,...,242.350297,53.918217,,26.041661,157.677695,,14.389459,95.068211,135.372226,224.32
2015-01-06 00:00:00+00:00,,42.10,23.946682,,41.950228,18.059473,,57.175761,,17.721406,...,234.737668,53.126134,,25.682526,152.903807,,14.195475,94.310695,133.123518,215.26
2015-01-07 00:00:00+00:00,,42.13,23.968506,,41.801731,18.020454,,56.540442,,17.569941,...,232.409099,53.239288,,25.796398,151.370587,,14.119267,93.401676,132.346691,215.12
2015-01-08 00:00:00+00:00,,40.94,24.279502,,42.172972,18.235060,,57.312115,,17.794170,...,236.260193,54.434485,,26.041661,156.492934,,14.257827,95.522720,134.595399,221.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-24 00:00:00+00:00,14.960,31.39,16.987634,9.288735,31.925385,12.766248,10.683127,30.300696,26.3328,14.393392,...,78.159666,51.070113,,27.910125,87.337064,46.7917,,46.812284,82.570177,108.12
2019-12-26 00:00:00+00:00,15.072,31.97,17.204037,9.557884,32.333733,12.913941,10.805842,30.761793,26.5800,14.490263,...,78.159666,51.053628,,28.044170,87.668724,46.8312,,47.117748,84.012751,106.24
2019-12-27 00:00:00+00:00,15.075,31.33,16.956719,9.407243,31.814017,12.738555,10.677725,30.103083,26.5470,14.377247,...,76.818691,50.839325,,27.938849,86.194680,47.0237,,46.735918,82.913647,108.24
2019-12-30 00:00:00+00:00,15.110,31.88,16.717130,9.203166,31.331424,12.581631,10.516421,29.641985,26.4320,14.199650,...,78.063882,50.683543,,27.919699,86.084127,47.2522,,46.455910,81.951931,106.48


### Volume

In [None]:
# get info about volume
frequency = 'daily'
error_counter = 0
dataset_tiingo_volume = {key: None for key in tickers}
for ticker in tickers:
    try:
        df = client.get_dataframe([ticker],
                                  frequency=frequency,
                                  metric_name='volume',
                                  startDate='2015-01-01',
                                  endDate='2020-01-01')
        series = df[ticker]
        series.name = ticker  # filter close price only
        dataset_tiingo_volume[ticker] = series.copy()
    except:
        error_counter = error_counter + 1
        print('Not Possible to retrieve information for ' + ticker)

print('\nUnable to download info about volume from ' + str(error_counter / len(tickers) * 100) + '% of the ETFs')

df_volume = dict_to_df(dataset_tiingo_volume)
df_volume

Not Possible to retrieve information for DGL
Not Possible to retrieve information for DGLD
Not Possible to retrieve information for DSLV
Not Possible to retrieve information for OILU


ERROR:root:b'{"detail":"Error: Ticker \'OLEM\' not found"}'


Not Possible to retrieve information for OLEM

Unable to download info about volume from 2.3474178403755865% of the ETFs


Unnamed: 0,AAAU,AGQ,AMJ,AMJL,AMLP,AMU,AMUB,AMZA,AOIL,ATMP,...,XES,XLE,XLEY,XME,XOP,YGRN,YMLI,YMLP,ZMLP,ZSL
2018-08-15 00:00:00+00:00,27407.0,584484,1720437,218.0,19882039,79100,170.0,1027484,300.0,2409,...,1538759,26400448,,5324201,43285267,,2188.0,10509,18977,29873
2018-08-16 00:00:00+00:00,428440.0,185009,1611562,1207.0,13957349,26200,0.0,868092,430.0,32650,...,821879,25013734,,2200654,20115394,,3690.0,7016,12499,18491
2018-08-17 00:00:00+00:00,52373.0,119261,2675623,1011.0,13793692,12600,0.0,742004,450.0,107279,...,1333808,11532984,,3101343,11528961,,1002.0,10724,2226,19221
2018-08-20 00:00:00+00:00,28728.0,86154,1523302,796.0,8734210,38200,0.0,1592126,580.0,156785,...,788708,8345742,,1777637,11450793,,5401.0,11627,9638,12030
2018-08-21 00:00:00+00:00,30625.0,62426,1050554,5454.0,14541267,18100,300.0,1148210,103.0,336392,...,766371,9509139,,1119893,13294464,,4397.0,2218,10518,16403
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2018-08-08 00:00:00+00:00,,65757,3508720,418.0,18018209,35700,75.0,874594,0.0,83387,...,1254113,9401055,,2813053,18628385,,1734.0,3596,6561,6508
2018-08-09 00:00:00+00:00,,47629,1513761,408.0,19918806,120200,0.0,657823,200.0,39446,...,882313,15232467,,2345184,9132557,,3405.0,4026,18478,2103
2018-08-10 00:00:00+00:00,,98531,3213111,2142.0,53371799,23600,0.0,698411,5100.0,34989,...,4164699,16977728,,2064293,10781484,,4897.0,2909,6272,14232
2018-08-13 00:00:00+00:00,,244683,1703311,931.0,15347257,18300,112.0,844565,300.0,32814,...,1320245,9817144,,2121845,13300362,,8243.0,3587,7143,42879


In [None]:
df_volume = df_volume.sort_index()
df_volume

Unnamed: 0,AAAU,AGQ,AMJ,AMJL,AMLP,AMU,AMUB,AMZA,AOIL,ATMP,...,XES,XLE,XLEY,XME,XOP,YGRN,YMLI,YMLP,ZMLP,ZSL
2015-01-02 00:00:00+00:00,,324143,1947856,,5806291,60623,,2205,,10719,...,192940,27749102,,1440026,5457702,,10020.0,123127,22487,34859
2015-01-05 00:00:00+00:00,,347534,1907890,,6900730,83844,,7526,,35967,...,358488,45395242,,2776157,10928048,,16634.0,123449,24458,46062
2015-01-06 00:00:00+00:00,,324555,2896647,,7236421,93500,,5683,,68337,...,274564,41874439,,2353055,12541465,,26135.0,100352,29398,55871
2015-01-07 00:00:00+00:00,,185193,2088680,,5144529,93718,,8284,,668181,...,156576,31191769,,3135626,9486224,,15408.0,114544,47453,64865
2015-01-08 00:00:00+00:00,,201442,1537017,,6945633,101654,,1537,,76318,...,275026,28175464,,1390096,11885961,,27618.0,131492,104598,35943
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-24 00:00:00+00:00,66352.0,246155,1888112,2970.0,13686597,114000,455.0,386535,119.0,104357,...,678129,5037439,,1430830,10694001,0.0,,5979,67618,24487
2019-12-26 00:00:00+00:00,42613.0,479841,2669241,16643.0,28248658,524100,750.0,754797,0.0,237661,...,1025838,10156173,,1455855,17934953,0.0,,6006,39425,37402
2019-12-27 00:00:00+00:00,64585.0,418601,2696384,12583.0,23976103,356300,343.0,633323,12.0,261200,...,805143,10718287,,1947094,30191814,0.0,,16318,52389,32651
2019-12-30 00:00:00+00:00,93364.0,398675,3005170,28251.0,31170279,329900,54.0,924123,110.0,259490,...,1618250,14629528,,2863776,33020918,0.0,,31342,33017,40358


In [None]:
# interpolate
limit = 10
df_volume = df_volume.interpolate(method="linear",limit=limit)
df_volume

Unnamed: 0,AAAU,AGQ,AMJ,AMJL,AMLP,AMU,AMUB,AMZA,AOIL,ATMP,...,XES,XLE,XLEY,XME,XOP,YGRN,YMLI,YMLP,ZMLP,ZSL
2015-01-02 00:00:00+00:00,,324143,1947856,,5806291,60623,,2205,,10719,...,192940,27749102,,1440026,5457702,,10020.0,123127,22487,34859
2015-01-05 00:00:00+00:00,,347534,1907890,,6900730,83844,,7526,,35967,...,358488,45395242,,2776157,10928048,,16634.0,123449,24458,46062
2015-01-06 00:00:00+00:00,,324555,2896647,,7236421,93500,,5683,,68337,...,274564,41874439,,2353055,12541465,,26135.0,100352,29398,55871
2015-01-07 00:00:00+00:00,,185193,2088680,,5144529,93718,,8284,,668181,...,156576,31191769,,3135626,9486224,,15408.0,114544,47453,64865
2015-01-08 00:00:00+00:00,,201442,1537017,,6945633,101654,,1537,,76318,...,275026,28175464,,1390096,11885961,,27618.0,131492,104598,35943
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-24 00:00:00+00:00,66352.0,246155,1888112,2970.0,13686597,114000,455.0,386535,119.0,104357,...,678129,5037439,,1430830,10694001,0.0,,5979,67618,24487
2019-12-26 00:00:00+00:00,42613.0,479841,2669241,16643.0,28248658,524100,750.0,754797,0.0,237661,...,1025838,10156173,,1455855,17934953,0.0,,6006,39425,37402
2019-12-27 00:00:00+00:00,64585.0,418601,2696384,12583.0,23976103,356300,343.0,633323,12.0,261200,...,805143,10718287,,1947094,30191814,0.0,,16318,52389,32651
2019-12-30 00:00:00+00:00,93364.0,398675,3005170,28251.0,31170279,329900,54.0,924123,110.0,259490,...,1618250,14629528,,2863776,33020918,0.0,,31342,33017,40358


## Data Cleaning

In [None]:
initial_train_date = pd.to_datetime('2015-01-01', utc=True)
final_train_date = pd.to_datetime('2017-12-31', utc=True)
initial_test_date = pd.to_datetime('2018-01-01', utc=True)
final_test_date = pd.to_datetime('2019-12-31', utc=True)

### Discard ETFs that were not traded during at least one day

In [None]:
# split data in training and test
df_volume_train, df_volume_test = split_data(df_volume,
                                             (initial_train_date,final_train_date),
                                             (initial_test_date,final_test_date),
                                             remove_nan=True)

# interpolate for single day ocurrences
df_volume_train = df_volume_train.replace(to_replace=0,value=np.nan)
df_volume_train = df_volume_train.interpolate(method="linear",limit=2)
# replace again by zeros
df_volume_train = df_volume_train.replace(to_replace=np.nan,value=0)

# Calculate the number of days with zero trading volume for each ETF.
zero_counts = (df_volume_train == 0).sum(axis=0)
# Select the ETFs with at least one day of zero trading volume.
etfs_with_zero_volume = zero_counts[zero_counts > 0]
# Output the number of ETFs that need to be removed due to insufficient liquidity.
print('Must remove {} tickers because of insufficient liquidity'.format(len(etfs_with_zero_volume)))
# Obtain the list of ETFs that need to be removed.
tickers_to_remove = list(etfs_with_zero_volume.index)
tickers_to_remove

Total of 208 tickers
Total of 137 tickers after removing tickers with NaN values
Must remove 16 tickers because of insufficient liquidity


['BCM',
 'CHIE',
 'DDG',
 'FUD',
 'FUE',
 'GRU',
 'GSC',
 'LD',
 'MLPC',
 'MLPO',
 'OIL',
 'RJN',
 'TAGS',
 'UAG',
 'UBG',
 'USV']

In [None]:
df_prices_interpolate = df_prices_interpolate.drop(columns=tickers_to_remove)

In [None]:
# split data in training and test
df_prices_train, df_prices_test = split_data(df_prices_interpolate,
                                                            (initial_train_date,
                                                             final_train_date),
                                                            (initial_test_date,
                                                             final_test_date),
                                                            remove_nan=True)
print('Total days of trading: ', len(df_prices_train)+len(df_prices_test))

Total of 192 tickers
Total of 121 tickers after removing tickers with NaN values
Total days of trading:  1258


In [None]:
df_prices_train

Unnamed: 0,AGQ,AMJ,AMLP,AMU,AMZA,ATMP,BNO,BOIL,CANE,CGW,...,USO,VDE,WEAT,XES,XLE,XME,XOP,YMLP,ZMLP,ZSL
2015-01-02 00:00:00+00:00,38.4200,25.599871,43.707435,19.249564,62.214415,18.775602,22.0199,3204.0,11.5900,24.501518,...,159.120,83.842583,12.5400,254.709625,56.244960,27.040231,168.479924,98.249776,139.746985,235.58
2015-01-05 00:00:00+00:00,40.4600,24.437728,42.519464,18.378131,57.465489,18.012219,20.7000,3144.0,11.4501,23.914393,...,150.320,80.516682,12.6500,242.350297,53.918217,26.041661,157.677695,95.068211,135.372226,224.32
2015-01-06 00:00:00+00:00,42.1000,23.946682,41.950228,18.059473,57.175761,17.721406,19.9200,3150.0,11.8600,23.756658,...,144.400,79.305905,12.7300,234.737668,53.126134,25.682526,152.903807,94.310695,133.123518,215.26
2015-01-07 00:00:00+00:00,42.1300,23.968506,41.801731,18.020454,56.540442,17.569941,19.9700,3036.0,11.7100,23.984498,...,146.952,79.500228,12.4761,232.409099,53.239288,25.796398,151.370587,93.401676,132.346691,215.12
2015-01-08 00:00:00+00:00,40.9400,24.279502,42.172972,18.235060,57.312115,17.794170,20.0100,3180.0,12.0700,24.177285,...,148.400,81.219232,12.2300,236.260193,54.434485,26.041661,156.492934,95.522720,134.595399,221.00
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2017-12-22 00:00:00+00:00,31.5200,18.209592,34.235607,13.689727,38.061933,14.747087,17.5600,268.0,9.5210,32.461886,...,93.280,80.356694,5.9399,158.420614,55.549447,32.631044,132.819874,49.312120,86.652326,135.32
2017-12-26 00:00:00+00:00,32.3400,18.309461,34.298772,13.729453,38.283739,14.789895,18.0000,266.0,9.6200,32.514948,...,95.600,81.180154,5.9300,161.891047,56.035707,33.200137,135.777840,49.397214,87.249180,131.72
2017-12-27 00:00:00+00:00,32.8600,18.236224,34.077693,13.665891,38.283739,14.725684,17.9200,284.0,9.6740,32.597847,...,95.360,80.862184,5.9800,160.765501,55.850465,32.897233,134.695657,49.120658,86.760845,129.56
2017-12-28 00:00:00+00:00,33.4586,18.316119,34.235607,13.745344,38.195017,14.825567,18.0000,320.0,9.7200,32.717591,...,95.760,80.968174,6.0000,160.108933,55.935367,33.411252,135.705694,49.418487,86.598066,127.16


In [None]:
df_prices_test

Unnamed: 0,AGQ,AMJ,AMLP,AMU,AMZA,ATMP,BNO,BOIL,CANE,CGW,...,USO,VDE,WEAT,XES,XLE,XME,XOP,YMLP,ZMLP,ZSL
2018-01-02 00:00:00+00:00,34.86,18.682309,34.930425,14.015484,39.215325,15.139487,18.10,338.0,9.888,32.689957,...,96.56,82.125911,6.0800,163.766958,56.691772,34.604511,137.870059,51.269287,89.039741,122.04
2018-01-03 00:00:00+00:00,34.67,19.141710,35.783157,14.341240,40.102549,15.503348,18.45,328.5,9.940,32.689957,...,98.72,83.373331,6.0900,167.800164,57.540798,34.375038,139.240824,51.822400,91.318638,122.80
2018-01-04 00:00:00+00:00,34.90,19.268211,36.098984,14.436584,40.146911,15.617501,18.48,304.0,9.860,32.800490,...,98.96,83.821750,6.0700,171.458189,57.888127,34.604511,139.926206,52.120230,91.644194,121.96
2018-01-05 00:00:00+00:00,34.84,19.088446,35.846323,14.341240,39.958648,15.581828,18.42,290.0,9.830,33.003133,...,98.48,83.797291,6.0200,170.989212,57.864972,34.531079,139.024387,51.843674,91.047340,121.92
2018-01-08 00:00:00+00:00,34.53,19.208290,35.941071,14.444529,40.052779,15.603232,18.46,292.5,9.650,33.086032,...,99.04,84.270169,5.9999,174.928623,58.212300,34.815626,139.854061,52.056409,91.752713,123.32
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-12-24 00:00:00+00:00,31.39,16.987634,31.925385,12.766248,30.300696,14.393392,20.92,83.6,6.990,38.965062,...,102.16,70.764749,5.6800,78.159666,51.070113,27.910125,87.337064,46.812284,82.570177,108.12
2019-12-26 00:00:00+00:00,31.97,17.204037,32.333733,12.913941,30.761793,14.490263,21.12,87.0,7.010,39.251781,...,103.12,70.712799,5.7600,78.159666,51.053628,28.044170,87.668724,47.117748,84.012751,106.24
2019-12-27 00:00:00+00:00,31.33,16.956719,31.814017,12.738555,30.103083,14.377247,21.18,86.1,7.050,39.356911,...,103.28,70.314514,5.8200,76.818691,50.839325,27.938849,86.194680,46.735918,82.913647,108.24
2019-12-30 00:00:00+00:00,31.88,16.717130,31.331424,12.581631,29.641985,14.199650,21.08,84.2,7.080,39.251781,...,103.12,70.115372,5.8000,78.063882,50.683543,27.919699,86.084127,46.455910,81.951931,106.48


In [None]:
df_prices_train.isna().any().any()

False

In [None]:
df_prices_test.isna().any().any()

False