# Setup

### Install relevant packages

In [1]:
# conda install pandas
# pip install beautifulsoup4
# pip install requests
# pip install yfinance
# pip install yahoo_fin
# pip install requests_html
# pip install python-twitter
# conda install quandl
# pip install pandas_datareader
# pip install tweepy             # python client for the official Twitter API
# pip install textblob           # python library for processing textual data
# pip install twint              # python client for unlimited Twitter scrape
# pip install nest_asyncio

### Install relevant corpora

In [2]:
# python -m textblob.download_corpora   #NLTK corpora

### Import relevant packages

In [1]:
# general packages
import pandas as pd
from tqdm.auto import tqdm
import os

In [2]:
# packages for importing variables from YAML file
import yaml

In [3]:
# packages for printing nan value ranges
from pprint import pprint

In [55]:
# packages for getting quandl economic data
import quandl

### Ensure nested runtimes cannot happen

In [4]:
import nest_asyncio
nest_asyncio.apply()

### Configure max rows and max columns

In [5]:
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 100)

### Load in the '.py' file containg the functions used in this notebook

In [12]:
%load_ext autoreload
%autoreload 1
%aimport data_collection_functions

import data_collection_functions as dc

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# ----------------------------------------------------------------------------------------------------------------

# Read in variables from YAML configuration file

In [91]:
# read in YAML configuration file
with open("../../config_files/config.yaml", "r") as variables:
    config_variables = yaml.load(variables, Loader=yaml.FullLoader)

In [45]:
data_directory = config_variables["data_directory"]
scrape_directory = os.path.join(data_directory, "Scraped_data")

In [9]:
start_date = config_variables["start_date"]

# ----------------------------------------------------------------------------------------------------------------

# Scrape bitcoin's internal data

Internal Bitcoin features were an important basis point for the data I would use in this project for my prediction model.

These features directly described bitcoin with respect to the bitcoin market, looking at bitcoin as a traded asset, and also with respect to the bitcoin network, looking at bitcoin as an infrastructure.

Previous studies showed that these were important in dictating the prediction model with the most important feature at informing the forecast actually being the previous price. [REFERENCE]

While these studies also showed that these features were not enough to create a really good and accurate model, they did prove important to a degree. We will scrape these features and use them as a base of our model and then build on them with other features.

### Scrape the market and blockchain data from *'data.bitcoinity.org'*

The 'bitcoinity.org' website was a good place I found to scrape this internal data. It contained both market and blockchain data in an easy to scrape way.
This website was set up so that the specific data feature wanted can be clicked from a menu on the side of the screen and then a graph of this data shows up along with a button to download a CSV of this data.
This CSV formatted data from all the major exchanges in a manegable format so that I could scrape all of this into a usable table for my model.

While I could have manually downlaoded all of these CSV files, I decided to set up a scrape for this as this would allow me to scape the latest data at any point in the future rather than having to manually downlaod all the data each time. Instead, I could just run this scrape and obtain the latest data.

My process for this scrape was to iteratively click through the different data features and download their CSV files. After this, I could then read in these CSV files again and merge them into one table.
After this data was scraped and merged into a big table, I stored this table of internal features in a CSV to ease the process of reading it back in. This would help me save time when getting it again but also protected me from any obscure website changes.

#### Scrape the data

In [10]:
bitcoinity_data = dc.scrape_bitcoinity_data(scrape_directory, config_variables, merge_exchanges=True)

Scraping the bitcoin market data


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=11.0), HTML(value='')))

    - Trading Volume
    - Rank
    - Price
    - Market Cap
    - Trades Per Minute
    - Volatility
    - Bid/Ask Spread
    - Bid/Ask Sum

Scraping the bitcoin blockchain data


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=7.0), HTML(value='')))

    - Hashrate
    - Mining Difficulty
    - Block Size
    - Number Of Transactions
    - Time Between Blocks
    - Block Size Votes

Scraped 4499 days of data - from '2009-01-09' to '2021-05-05'


#### Check where nan values are in this data

In [153]:
bitcoinity_nan_col_dates = dc.find_col_nan_ranges(bitcoinity_data, output=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=24.0), HTML(value='')))


---------------------------------------------------------------------
24 columns had a 'NaN' value in them:
['trading_volume',
 'price',
 'market_cap',
 'trades_per_minute',
 'volatility',
 'bid_and_ask_spread',
 'bid_and_ask_sum_asks',
 'bid_and_ask_sum_bids',
 'hashrate',
 'mining_difficulty',
 'block_size',
 'number_of_transactions',
 'time_between_blocks',
 'block_size_votes',
 'rank_bit_x',
 'rank_bitfinex',
 'rank_bitstamp',
 'rank_btce',
 'rank_coinbase',
 'rank_itbit',
 'rank_kraken',
 'rank_mtgox',
 'rank_okcoin',
 'rank_others']
---------------------------------------------------------------------
The date ranges in these columns where the NaN's are located are:
{'bid_and_ask_spread': [('2009-01-09', '2011-06-16'),
                        ('2011-12-01', '2011-12-04'),
                        ('2011-12-21', '2012-06-30'),
                        ('2012-07-12', '2012-07-14'),
                        ('2012-08-24', '2012-08-26'),
                        ('2012-09-06', '2012-10-

### Scrape the market and blockchain data from *'bitinfocharts.com'*

All the fees and transaction values are in USD.

- To obtain these in bitcoin -> divide by the BTC price in USD

You can also:
- Drop them
- Recalculate them after converting the raw values to the BTC

#### Scrape the data

In [15]:
bitinfocharts_data = dc.scrape_bitinfocharts_data(scrape_directory, config_variables)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=874.0), HTML(value='')))


Scraped 4506 days of data - from '2009-01-03' to '2021-05-06'


#### Check where nan values are in this data

In [20]:
bitinfocharts_nan_col_dates = dc.find_col_nan_ranges(bitinfocharts_data, output=True)

874 columns with 'nan' in them found
['marketcap',
 'marketcap3sma',
 'marketcap7sma',
 'marketcap14sma',
 'marketcap30sma',
 'marketcap90sma',
 'marketcap3ema',
 'marketcap7ema',
 'marketcap14ema',
 'marketcap30ema',
 'marketcap90ema',
 'marketcap3wma',
 'marketcap7wma',
 'marketcap14wma',
 'marketcap30wma',
 'marketcap90wma',
 'marketcap3trx',
 'marketcap7trx',
 'marketcap14trx',
 'marketcap30trx',
 'marketcap90trx',
 'marketcap3mom',
 'marketcap7mom',
 'marketcap14mom',
 'marketcap30mom',
 'marketcap90mom',
 'marketcap3std',
 'marketcap7std',
 'marketcap14std',
 'marketcap30std',
 'marketcap90std',
 'marketcap3var',
 'marketcap7var',
 'marketcap14var',
 'marketcap30var',
 'marketcap90var',
 'marketcap3rsi',
 'marketcap7rsi',
 'marketcap14rsi',
 'marketcap30rsi',
 'marketcap90rsi',
 'marketcap3roc',
 'marketcap7roc',
 'marketcap14roc',
 'marketcap30roc',
 'marketcap90roc',
 'price',
 'price3sma',
 'price7sma',
 'price14sma',
 'price30sma',
 'price90sma',
 'price3ema',
 'price7ema',
 

### Merge the data from both of these different sources

In [187]:
# create a master dataframe with all the columns from both data sources
list_of_dfs = [bitcoinity_data, bitinfocharts_data]
merged_internal_data = dc.merge_dfs_on_col(list_of_dfs, "date")

In [188]:
# turn the same feature columns from both these data sources into one column
cols_to_join = config_variables["bitcoin_internal_data_cols_to_join"]
for col_name, (c1, c2) in cols_to_join.items():
    merged_internal_data[col_name] = merged_internal_data[[c1, c2]].mean(axis=1)
    merged_internal_data.drop(columns=[c1, c2], inplace=True)

In [189]:
merged_internal_data.shape

(4507, 890)

In [190]:
merged_internal_data.head()

Unnamed: 0,date,trading_volume,trades_per_minute,volatility,bid_and_ask_spread,bid_and_ask_sum_asks,bid_and_ask_sum_bids,time_between_blocks,block_size_votes,rank_bit_x,rank_bitfinex,rank_bitstamp,rank_btce,rank_coinbase,rank_itbit,rank_kraken,rank_mtgox,rank_okcoin,rank_others,marketcap3sma,marketcap7sma,marketcap14sma,marketcap30sma,marketcap90sma,marketcap3ema,marketcap7ema,marketcap14ema,marketcap30ema,marketcap90ema,marketcap3wma,marketcap7wma,marketcap14wma,marketcap30wma,marketcap90wma,marketcap3trx,marketcap7trx,marketcap14trx,marketcap30trx,marketcap90trx,marketcap3mom,marketcap7mom,marketcap14mom,marketcap30mom,marketcap90mom,marketcap3std,marketcap7std,marketcap14std,marketcap30std,marketcap90std,marketcap3var,...,google_trends90roc,top100cap,top100cap3sma,top100cap7sma,top100cap14sma,top100cap30sma,top100cap90sma,top100cap3ema,top100cap7ema,top100cap14ema,top100cap30ema,top100cap90ema,top100cap3wma,top100cap7wma,top100cap14wma,top100cap30wma,top100cap90wma,top100cap3trx,top100cap7trx,top100cap14trx,top100cap30trx,top100cap90trx,top100cap3mom,top100cap7mom,top100cap14mom,top100cap30mom,top100cap90mom,top100cap3std,top100cap7std,top100cap14std,top100cap30std,top100cap90std,top100cap3var,top100cap7var,top100cap14var,top100cap30var,top100cap90var,top100cap3rsi,top100cap7rsi,top100cap14rsi,top100cap30rsi,top100cap90rsi,top100cap3roc,top100cap7roc,top100cap14roc,top100cap30roc,top100cap90roc,price,hashrate,num_transactions
0,2009-01-03,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2009-01-04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2009-01-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,2009-01-06,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2009-01-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [191]:
merged_internal_data.tail()

Unnamed: 0,date,trading_volume,trades_per_minute,volatility,bid_and_ask_spread,bid_and_ask_sum_asks,bid_and_ask_sum_bids,time_between_blocks,block_size_votes,rank_bit_x,rank_bitfinex,rank_bitstamp,rank_btce,rank_coinbase,rank_itbit,rank_kraken,rank_mtgox,rank_okcoin,rank_others,marketcap3sma,marketcap7sma,marketcap14sma,marketcap30sma,marketcap90sma,marketcap3ema,marketcap7ema,marketcap14ema,marketcap30ema,marketcap90ema,marketcap3wma,marketcap7wma,marketcap14wma,marketcap30wma,marketcap90wma,marketcap3trx,marketcap7trx,marketcap14trx,marketcap30trx,marketcap90trx,marketcap3mom,marketcap7mom,marketcap14mom,marketcap30mom,marketcap90mom,marketcap3std,marketcap7std,marketcap14std,marketcap30std,marketcap90std,marketcap3var,...,google_trends90roc,top100cap,top100cap3sma,top100cap7sma,top100cap14sma,top100cap30sma,top100cap90sma,top100cap3ema,top100cap7ema,top100cap14ema,top100cap30ema,top100cap90ema,top100cap3wma,top100cap7wma,top100cap14wma,top100cap30wma,top100cap90wma,top100cap3trx,top100cap7trx,top100cap14trx,top100cap30trx,top100cap90trx,top100cap3mom,top100cap7mom,top100cap14mom,top100cap30mom,top100cap90mom,top100cap3std,top100cap7std,top100cap14std,top100cap30std,top100cap90std,top100cap3var,top100cap7var,top100cap14var,top100cap30var,top100cap90var,top100cap3rsi,top100cap7rsi,top100cap14rsi,top100cap30rsi,top100cap90rsi,top100cap3roc,top100cap7roc,top100cap14roc,top100cap30roc,top100cap90roc,price,hashrate,num_transactions
4502,2021-05-02,4535.371156,90.996667,786.964035,2.323319,49304730.0,41573460.0,,,0.014749,2308.344201,466.043687,,1401.43157,,501.754606,,,325.262687,1058878000000.0,1031316000000.0,1010633000000.0,1064962000000.0,990571400000.0,1056823000000.0,1036980000000.0,1034445000000.0,1043193000000.0,955137400000.0,1063883000000.0,1043692000000.0,1017077000000.0,1045130000000.0,1037883000000.0,1.342,-0.119,-0.27,0.25,0.781,55673100000.0,134980200000.0,15192450000.0,-43463590000.0,435737200000.0,39078220000.0,57657810000.0,93441790000.0,133785700000.0,235232600000.0,3.817768e+20,...,,14.832,14.818,14.769,14.669,14.519,14.188,14.813,14.767,14.691,14.563,14.247,14.821,14.793,14.73,14.617,14.373,0.156,0.155,0.107,0.079,0.021,0.052,0.139,0.265,0.493,1.263,0.023,0.101,0.23,0.362,0.668,,0.003,0.013,0.033,0.112,88.237,86.925,82.021,75.997,64.37,0.35,0.944,1.817,3.437,9.304,56536.055373,1.678915e+20,235029.0
4503,2021-05-03,8455.341969,133.702361,140.935477,2.6414,70767490.0,38840490.0,,,0.013197,2273.890488,429.608004,,1375.0471,,461.062539,,,298.032956,1075468000000.0,1044488000000.0,1012719000000.0,1064377000000.0,995427600000.0,1069850000000.0,1048454000000.0,1040902000000.0,1045753000000.0,957944800000.0,1075882000000.0,1056582000000.0,1026709000000.0,1046286000000.0,1039911000000.0,1.295,0.079,-0.241,0.235,0.774,49771420000.0,92200290000.0,29207880000.0,-17555840000.0,437061000000.0,17558540000.0,56619340000.0,98366500000.0,133312700000.0,224357300000.0,7.707561e+19,...,,14.847,14.828,14.792,14.694,14.537,14.202,14.83,14.787,14.712,14.582,14.261,14.835,14.813,14.754,14.638,14.387,0.141,0.155,0.111,0.08,0.021,0.03,0.162,0.342,0.528,1.269,0.035,0.087,0.227,0.372,0.67,,0.002,0.013,0.035,0.112,90.721,87.913,82.672,76.399,64.567,0.2,1.103,2.36,3.687,9.345,57914.452032,1.813223e+20,286830.0
4504,2021-05-04,12928.49,190.094861,187.732074,2.579816,50452760.0,45333060.0,,,0.013616,2379.419313,447.921898,,1458.092366,,495.607223,,,375.329754,1060083000000.0,1046478000000.0,1012488000000.0,1062933000000.0,999358000000.0,1052044000000.0,1044900000000.0,1040014000000.0,1045010000000.0,959621600000.0,1055267000000.0,1054020000000.0,1029578000000.0,1044341000000.0,1040764000000.0,0.525,0.163,-0.216,0.22,0.767,-46156430000.0,13933800000.0,-3234291000.0,-43315730000.0,353734900000.0,39947180000.0,53997470000.0,98147830000.0,133648400000.0,214320900000.0,3.989443e+20,...,,14.88,14.853,14.815,14.719,14.556,14.217,14.855,14.81,14.734,14.601,14.274,14.861,14.835,14.779,14.66,14.402,0.144,0.155,0.115,0.081,0.022,0.075,0.161,0.357,0.559,1.324,0.04,0.083,0.225,0.383,0.671,,0.002,0.013,0.037,0.112,94.618,89.94,84.09,77.299,65.013,0.51,1.097,2.457,3.903,9.769,55283.417218,1.879044e+20,300643.0
4505,2021-05-05,11404.495772,177.942361,505.884363,2.69045,65307110.0,41335940.0,,,0.014003,2733.793479,498.577291,,1470.038811,,482.710054,,,378.211717,1053983000000.0,1049435000000.0,1013099000000.0,1061604000000.0,1003203000000.0,1048439000000.0,1044884000000.0,1040657000000.0,1044999000000.0,961494400000.0,1047643000000.0,1053609000000.0,1033891000000.0,1043173000000.0,1041764000000.0,0.119,0.2,-0.192,0.207,0.759,-18300350000.0,20697130000.0,8558205000.0,-39872810000.0,346044100000.0,41767140000.0,50961860000.0,98837070000.0,133548800000.0,204819700000.0,4.361236e+20,...,,14.893,14.873,14.836,14.746,14.576,14.231,14.874,14.831,14.755,14.62,14.288,14.881,14.855,14.802,14.682,14.417,0.141,0.154,0.118,0.083,0.023,0.061,0.15,0.371,0.615,1.308,0.039,0.075,0.213,0.387,0.672,,0.001,0.011,0.037,0.113,95.696,90.666,84.628,77.649,65.19,0.411,1.016,2.558,4.309,9.628,55395.842503,1.735439e+20,309617.0
4506,2021-05-06,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,14.897,14.89,14.853,14.772,14.598,14.246,14.886,14.848,14.774,14.638,14.301,14.893,14.87,14.822,14.703,14.432,0.125,0.151,0.12,0.084,0.024,0.05,0.117,0.365,0.662,1.362,0.015,0.069,0.19,0.382,0.67,,0.001,0.009,0.037,0.112,96.055,90.898,84.796,77.757,65.243,0.34,0.79,2.51,4.652,10.066,56654.0,,


### Fill in the Nan values using interpolation

In [192]:
processed_internal_data = merged_internal_data.interpolate(axis=0)

#### Check again if there are any NaN values

In [193]:
processed_internal_nan_col_dates = dc.find_col_nan_ranges(processed_internal_data, output=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=889.0), HTML(value='')))


---------------------------------------------------------------------
889 columns had a 'NaN' value in them:
['trading_volume',
 'trades_per_minute',
 'volatility',
 'bid_and_ask_spread',
 'bid_and_ask_sum_asks',
 'bid_and_ask_sum_bids',
 'time_between_blocks',
 'block_size_votes',
 'rank_bit_x',
 'rank_bitfinex',
 'rank_bitstamp',
 'rank_btce',
 'rank_coinbase',
 'rank_itbit',
 'rank_kraken',
 'rank_mtgox',
 'rank_okcoin',
 'rank_others',
 'marketcap3sma',
 'marketcap7sma',
 'marketcap14sma',
 'marketcap30sma',
 'marketcap90sma',
 'marketcap3ema',
 'marketcap7ema',
 'marketcap14ema',
 'marketcap30ema',
 'marketcap90ema',
 'marketcap3wma',
 'marketcap7wma',
 'marketcap14wma',
 'marketcap30wma',
 'marketcap90wma',
 'marketcap3trx',
 'marketcap7trx',
 'marketcap14trx',
 'marketcap30trx',
 'marketcap90trx',
 'marketcap3mom',
 'marketcap7mom',
 'marketcap14mom',
 'marketcap30mom',
 'marketcap90mom',
 'marketcap3std',
 'marketcap7std',
 'marketcap14std',
 'marketcap30std',
 'marketcap90std

 'marketcap90sma': [('2009-01-03', '2010-10-14')],
 'marketcap90std': [('2009-01-03', '2010-10-14')],
 'marketcap90trx': [('2009-01-03', '2011-04-11')],
 'marketcap90var': [('2009-01-03', '2010-10-14')],
 'marketcap90wma': [('2009-01-03', '2010-10-14')],
 'median_transaction_fee': [('2009-01-03', '2011-04-14')],
 'median_transaction_fee14ema': [('2009-01-03', '2011-04-14')],
 'median_transaction_fee14mom': [('2009-01-03', '2011-04-14')],
 'median_transaction_fee14roc': [('2009-01-03', '2011-04-28')],
 'median_transaction_fee14rsi': [('2009-01-03', '2011-04-14')],
 'median_transaction_fee14sma': [('2009-01-03', '2011-04-14')],
 'median_transaction_fee14std': [('2009-01-03', '2011-04-14')],
 'median_transaction_fee14trx': [('2009-01-03', '2011-04-15')],
 'median_transaction_fee14var': [('2009-01-03', '2013-10-26')],
 'median_transaction_fee14wma': [('2009-01-03', '2011-04-14')],
 'median_transaction_fee30ema': [('2009-01-03', '2011-04-14')],
 'median_transaction_fee30mom': [('2009-01-03'

 'transactionvalue90trx': [('2009-01-03', '2011-04-11')],
 'transactionvalue90var': [('2009-01-03', '2010-10-14')],
 'transactionvalue90wma': [('2009-01-03', '2010-10-14')],
 'tweets': [('2009-01-03', '2014-04-09')],
 'tweets14ema': [('2009-01-03', '2014-04-22')],
 'tweets14mom': [('2009-01-03', '2014-04-23')],
 'tweets14roc': [('2009-01-03', '2014-04-23')],
 'tweets14rsi': [('2009-01-03', '2014-04-23')],
 'tweets14sma': [('2009-01-03', '2014-04-22')],
 'tweets14std': [('2009-01-03', '2014-04-22')],
 'tweets14trx': [('2009-01-03', '2014-05-19')],
 'tweets14var': [('2009-01-03', '2014-04-22')],
 'tweets14wma': [('2009-01-03', '2014-04-22')],
 'tweets30ema': [('2009-01-03', '2014-05-08')],
 'tweets30mom': [('2009-01-03', '2014-05-09')],
 'tweets30roc': [('2009-01-03', '2014-05-09')],
 'tweets30rsi': [('2009-01-03', '2014-05-09')],
 'tweets30sma': [('2009-01-03', '2014-05-08')],
 'tweets30std': [('2009-01-03', '2014-05-08')],
 'tweets30trx': [('2009-01-03', '2014-07-06')],
 'tweets30var':

### Output internal bitcoin data file

In [194]:
processed_internal_data.to_csv(os.path.join(scrape_directory, "all_internal_data.csv"), index=False)

Could also scrape:

https://www.blockchain.com/charts/total-bitcoins

# ----------------------------------------------------------------------------------------------------------------

As well as the internal data being useful to bitcoin price prediction, it is equally important to include external data. As I saw from my literature review, these features are very good indicators of bitcoin price and this has been demonstrated in multiple studies. [REFERENCE]

I will deal with multiple different types of external data, including, stock data, commodity data, twitter data and other news data. We will explore the data collection and processing for this below.

# Get stock data

Given that Bitcoin is a financial asset, stock data will also play a part in its price.
In https://www.researchgate.net/publication/330224580_Analysis_of_the_relationships_between_Bitcoin_and_exchange_rate_commodities_and_global_indexes_by_asymmetric_causality_test, they look at the relationship between stock global indexes and Bitcoin. 

https://www.ig.com/en/trading-opportunities/top-10-most-traded-commodities-180905
The data we’re importing are price data for copper and gold futures. The ratio of copper-to-gold prices is often seen an an indicator of economic health with copper going up relative to gold when there is strong economic growth and hence industrial demand. If gold outperforms copper that’s often a sign that investors are worried about growth and inflation.

### Get the stock, commodity, and currency exchange data from Yahoo Finance

In [46]:
yahoo_name_to_ticker_map = config_variables["yahoo_name_to_ticker_map"]

In [173]:
yahoo_stock_df = dc.scrape_stock_from_yahoo_finance(scrape_directory, yahoo_name_to_ticker_map, start_date)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=62.0), HTML(value='')))




### Check this dataframe for nan values

In [171]:
yahoo_nan_col_dates = dc.find_col_nan_ranges(yahoo_stock_df, output=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=13.0), HTML(value='')))


---------------------------------------------------------------------
13 columns had a 'NaN' value in them:
['crypto_index_open',
 'crypto_index_high',
 'crypto_index_low',
 'crypto_index_close',
 'johannesburg_exchange_open',
 'johannesburg_exchange_high',
 'johannesburg_exchange_low',
 'johannesburg_exchange_close',
 'aluminium_futures_open',
 'aluminium_futures_high',
 'aluminium_futures_low',
 'aluminium_futures_close',
 'aluminium_futures_volume']
---------------------------------------------------------------------
The date ranges in these columns where the NaN's are located are:
{'aluminium_futures_close': [('2009-01-01', '2014-05-06')],
 'aluminium_futures_high': [('2009-01-01', '2014-05-06')],
 'aluminium_futures_low': [('2009-01-01', '2014-05-06')],
 'aluminium_futures_open': [('2009-01-01', '2014-05-06')],
 'aluminium_futures_volume': [('2009-01-01', '2014-05-06')],
 'crypto_index_close': [('2009-01-01', '2018-12-31')],
 'crypto_index_high': [('2009-01-01', '2018-12-31')],


### Manually inspect the dataframe

In [174]:
yahoo_stock_df.shape

(4509, 295)

In [175]:
yahoo_stock_df.head()

Unnamed: 0_level_0,sp_500_open,sp_500_high,sp_500_low,sp_500_close,sp_500_volume,dow_jones_open,dow_jones_high,dow_jones_low,dow_jones_close,dow_jones_volume,vix_open,vix_high,vix_low,vix_close,ftse_100_open,ftse_100_high,ftse_100_low,ftse_100_close,ftse_100_volume,euro_stoxx_50_open,euro_stoxx_50_high,euro_stoxx_50_low,euro_stoxx_50_close,euro_stoxx_50_volume,crypto_index_open,crypto_index_high,crypto_index_low,crypto_index_close,nasdaq_exchange_open,nasdaq_exchange_high,nasdaq_exchange_low,nasdaq_exchange_close,nasdaq_exchange_volume,new_york_exchange_open,new_york_exchange_high,new_york_exchange_low,new_york_exchange_close,new_york_exchange_volume,amex_exchange_open,amex_exchange_high,amex_exchange_low,amex_exchange_close,amex_exchange_volume,russell_2000_open,russell_2000_high,russell_2000_low,russell_2000_close,russell_2000_volume,euronext_100_euro_open,euronext_100_euro_high,...,live_cattle_futures_volume,lean_hogs_futures_open,lean_hogs_futures_high,lean_hogs_futures_low,lean_hogs_futures_close,lean_hogs_futures_volume,eur_gbp_exchange_open,eur_gbp_exchange_high,eur_gbp_exchange_low,eur_gbp_exchange_close,eur_chf_exchange_open,eur_chf_exchange_high,eur_chf_exchange_low,eur_chf_exchange_close,eur_jpy_exchange_open,eur_jpy_exchange_high,eur_jpy_exchange_low,eur_jpy_exchange_close,gbp_jyp_exchange_open,gbp_jyp_exchange_high,gbp_jyp_exchange_low,gbp_jyp_exchange_close,usd_gbp_exchange_open,usd_gbp_exchange_high,usd_gbp_exchange_low,usd_gbp_exchange_close,usd_eur_exchange_open,usd_eur_exchange_high,usd_eur_exchange_low,usd_eur_exchange_close,usd_cad_exchange_open,usd_cad_exchange_high,usd_cad_exchange_low,usd_cad_exchange_close,usd_aud_exchange_open,usd_aud_exchange_high,usd_aud_exchange_low,usd_aud_exchange_close,usd_mxn_exchange_open,usd_mxn_exchange_high,usd_mxn_exchange_low,usd_mxn_exchange_close,usd_hkd_exchange_open,usd_hkd_exchange_high,usd_hkd_exchange_low,usd_hkd_exchange_close,usd_jpy_exchange_open,usd_jpy_exchange_high,usd_jpy_exchange_low,usd_jpy_exchange_close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2009-01-01,903.25,903.25,903.25,903.25,0,8776.389648,8776.389648,8776.389648,8776.389648,0,40.0,40.0,40.0,40.0,4392.700195,4392.700195,4392.700195,4392.700195,0,2451.47998,2451.47998,2451.47998,2451.47998,0,,,,,1577.030029,1577.030029,1577.030029,1577.030029,0,5757.049805,5757.049805,5757.049805,5757.049805,0,1397.530029,1397.530029,1397.530029,1397.530029,0,499.450012,499.450012,499.450012,499.450012,0,544.919983,544.919983,...,0,59.724998,59.724998,59.724998,59.724998,0,0.9586,0.9586,0.9472,0.9508,1.473,1.4942,1.473,1.4906,126.870003,128.179993,125.57,126.919998,131.380005,134.563004,131.320007,133.501999,0.6898,0.6898,0.67556,0.67838,0.71531,0.71659,0.71154,0.71434,1.2272,1.2272,1.2177,1.2218,1.4174,1.4453,1.401,1.4184,13.756,13.756,13.3716,13.434,7.7497,7.8081,7.7275,7.75,90.650002,91.25,87.559998,90.667
2009-01-02,902.98999,934.72998,899.349976,931.799988,4048270000,8772.25,9065.280273,8760.780273,9034.69043,2137000,39.580002,39.82,36.880001,39.189999,4434.200195,4561.799805,4430.0,4561.799805,407295500,2451.47998,2451.47998,2451.47998,2451.47998,0,,,,,1578.869995,1636.030029,1571.97998,1632.209961,1438410000,5755.759766,5937.02002,5732.680176,5915.72998,4048270000,1397.530029,1444.290039,1396.819946,1444.290039,139700,499.51001,508.450012,494.73999,505.839996,4048270000,546.909973,566.380005,...,16544,60.875,63.875,60.875,63.849998,15191,0.9507,0.9644,0.9438,0.9583,1.4908,1.5049,1.4814,1.5022,126.976997,128.0,126.032997,127.860001,133.526993,133.970001,131.559998,133.380005,0.67926,0.69382,0.67728,0.68852,0.71415,0.72239,0.71311,0.71824,1.2224,1.2274,1.2062,1.2094,1.4184,1.444,1.4045,1.4098,13.449,13.857,13.4244,13.775,7.749,7.7547,7.7445,7.7502,90.660004,92.370003,90.660004,91.790001
2009-01-03,931.799988,931.799988,931.799988,931.799988,0,9034.69043,9034.69043,9034.69043,9034.69043,0,39.189999,39.189999,39.189999,39.189999,4561.799805,4561.799805,4561.799805,4561.799805,0,2451.47998,2451.47998,2451.47998,2451.47998,0,,,,,1632.209961,1632.209961,1632.209961,1632.209961,0,5915.72998,5915.72998,5915.72998,5915.72998,0,1444.290039,1444.290039,1444.290039,1444.290039,0,505.839996,505.839996,505.839996,505.839996,0,566.380005,566.380005,...,0,63.849998,63.849998,63.849998,63.849998,0,0.9583,0.9583,0.9583,0.9583,1.5022,1.5022,1.5022,1.5022,127.860001,127.860001,127.860001,127.860001,133.380005,133.380005,133.380005,133.380005,0.68852,0.68852,0.68852,0.68852,0.71824,0.71824,0.71824,0.71824,1.2094,1.2094,1.2094,1.2094,1.4098,1.4098,1.4098,1.4098,13.775,13.775,13.775,13.775,7.7502,7.7502,7.7502,7.7502,91.790001,91.790001,91.790001,91.790001
2009-01-04,931.799988,931.799988,931.799988,931.799988,0,9034.69043,9034.69043,9034.69043,9034.69043,0,39.189999,39.189999,39.189999,39.189999,4561.799805,4561.799805,4561.799805,4561.799805,0,2451.47998,2451.47998,2451.47998,2451.47998,0,,,,,1632.209961,1632.209961,1632.209961,1632.209961,0,5915.72998,5915.72998,5915.72998,5915.72998,0,1444.290039,1444.290039,1444.290039,1444.290039,0,505.839996,505.839996,505.839996,505.839996,0,566.380005,566.380005,...,0,63.849998,63.849998,63.849998,63.849998,0,0.9583,0.9583,0.9583,0.9583,1.5022,1.5022,1.5022,1.5022,127.860001,127.860001,127.860001,127.860001,133.380005,133.380005,133.380005,133.380005,0.68852,0.68852,0.68852,0.68852,0.71824,0.71824,0.71824,0.71824,1.2094,1.2094,1.2094,1.2094,1.4098,1.4098,1.4098,1.4098,13.775,13.775,13.775,13.775,7.7502,7.7502,7.7502,7.7502,91.790001,91.790001,91.790001,91.790001
2009-01-05,929.169983,936.630005,919.530029,927.450012,5413910000,9027.129883,9034.370117,8892.360352,8952.889648,2337600,39.240002,40.220001,38.299999,39.080002,4561.799805,4618.100098,4520.799805,4579.600098,836676100,2551.699951,2563.959961,2527.330078,2553.409912,0,,,,,1621.47998,1640.459961,1604.630005,1628.030029,1816580000,5915.720215,5963.700195,5839.919922,5908.430176,5413910000,1444.290039,1461.219971,1425.619995,1461.219971,169000,505.559998,508.230011,496.01001,505.029999,5413910000,571.380005,572.919983,...,11996,63.950001,63.950001,62.400002,62.674999,15383,0.9604,0.9633,0.923,0.9266,1.4951,1.5139,1.4929,1.5119,128.460007,128.460007,126.417999,127.110001,133.639999,137.529999,132.919998,137.139999,0.68874,0.69252,0.67849,0.67953,0.71644,0.73768,0.71644,0.73282,1.2081,1.2213,1.1866,1.188,1.3974,1.4151,1.39,1.4,13.7726,13.7742,13.42,13.4275,7.7499,7.7557,7.743,7.7509,92.050003,93.529999,91.75,93.217003


In [176]:
yahoo_stock_df.tail()

Unnamed: 0_level_0,sp_500_open,sp_500_high,sp_500_low,sp_500_close,sp_500_volume,dow_jones_open,dow_jones_high,dow_jones_low,dow_jones_close,dow_jones_volume,vix_open,vix_high,vix_low,vix_close,ftse_100_open,ftse_100_high,ftse_100_low,ftse_100_close,ftse_100_volume,euro_stoxx_50_open,euro_stoxx_50_high,euro_stoxx_50_low,euro_stoxx_50_close,euro_stoxx_50_volume,crypto_index_open,crypto_index_high,crypto_index_low,crypto_index_close,nasdaq_exchange_open,nasdaq_exchange_high,nasdaq_exchange_low,nasdaq_exchange_close,nasdaq_exchange_volume,new_york_exchange_open,new_york_exchange_high,new_york_exchange_low,new_york_exchange_close,new_york_exchange_volume,amex_exchange_open,amex_exchange_high,amex_exchange_low,amex_exchange_close,amex_exchange_volume,russell_2000_open,russell_2000_high,russell_2000_low,russell_2000_close,russell_2000_volume,euronext_100_euro_open,euronext_100_euro_high,...,live_cattle_futures_volume,lean_hogs_futures_open,lean_hogs_futures_high,lean_hogs_futures_low,lean_hogs_futures_close,lean_hogs_futures_volume,eur_gbp_exchange_open,eur_gbp_exchange_high,eur_gbp_exchange_low,eur_gbp_exchange_close,eur_chf_exchange_open,eur_chf_exchange_high,eur_chf_exchange_low,eur_chf_exchange_close,eur_jpy_exchange_open,eur_jpy_exchange_high,eur_jpy_exchange_low,eur_jpy_exchange_close,gbp_jyp_exchange_open,gbp_jyp_exchange_high,gbp_jyp_exchange_low,gbp_jyp_exchange_close,usd_gbp_exchange_open,usd_gbp_exchange_high,usd_gbp_exchange_low,usd_gbp_exchange_close,usd_eur_exchange_open,usd_eur_exchange_high,usd_eur_exchange_low,usd_eur_exchange_close,usd_cad_exchange_open,usd_cad_exchange_high,usd_cad_exchange_low,usd_cad_exchange_close,usd_aud_exchange_open,usd_aud_exchange_high,usd_aud_exchange_low,usd_aud_exchange_close,usd_mxn_exchange_open,usd_mxn_exchange_high,usd_mxn_exchange_low,usd_mxn_exchange_close,usd_hkd_exchange_open,usd_hkd_exchange_high,usd_hkd_exchange_low,usd_hkd_exchange_close,usd_jpy_exchange_open,usd_jpy_exchange_high,usd_jpy_exchange_low,usd_jpy_exchange_close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2021-05-02,4181.169922,4181.169922,4181.169922,4181.169922,0,33874.851562,33874.851562,33874.851562,33874.851562,0,18.610001,18.610001,18.610001,18.610001,6969.799805,6969.799805,6969.799805,6969.799805,0,3974.73999,3974.73999,3974.73999,3974.73999,0,1337.473022,1337.473022,1337.473022,1337.473022,13962.679688,13962.679688,13962.679688,13962.679688,0,16219.330078,16219.330078,16219.330078,16219.330078,0,2942.48999,2942.48999,2942.48999,2942.48999,0,2266.449951,2266.449951,2266.449951,2266.449951,0,1228.380005,1228.380005,...,0,110.125,110.125,110.125,110.125,0,0.8691,0.8691,0.8691,0.8691,1.1019,1.1019,1.1019,1.1019,132.050995,132.050995,132.050995,132.050995,151.910004,151.910004,151.910004,151.910004,0.7167,0.7167,0.7167,0.7167,0.8246,0.8246,0.8246,0.8246,1.22762,1.22762,1.22762,1.22762,1.28574,1.28574,1.28574,1.28574,20.0243,20.0243,20.0243,20.0243,7.76333,7.76333,7.76333,7.76333,108.903999,108.903999,108.903999,108.903999
2021-05-03,4191.97998,4209.390137,4188.029785,4192.660156,4061170000,33904.890625,34221.058594,33904.890625,34113.230469,289770000,18.65,19.120001,17.799999,18.309999,6969.799805,6969.799805,6969.799805,6969.799805,0,3979.5,4009.72998,3965.5,4000.25,20815100,1409.531982,1409.849976,1408.170044,1408.170044,14031.769531,14042.120117,13881.509766,13895.120117,4718850000,16219.330078,16375.070312,16219.330078,16325.240234,4061170000,2942.48999,3030.100098,2942.48999,3029.290039,0,2268.620117,2288.570068,2267.780029,2277.449951,4061170000,1231.02002,1236.699951,...,26888,110.599998,111.099998,109.800003,110.699997,405,0.87019,0.87085,0.8665,0.87029,1.09789,1.1017,1.0975,1.098,131.429993,132.145004,131.429993,131.410004,151.018005,151.925003,151.018005,151.009003,0.72363,0.72435,0.71782,0.7236,0.8315,0.8323,0.827952,0.83141,1.22808,1.23164,1.2267,1.22791,1.295572,1.297387,1.2875,1.294834,20.2122,20.314381,20.143499,20.195801,7.76671,7.76825,7.7657,7.7668,109.299004,109.693001,108.905998,109.282997
2021-05-04,4179.040039,4179.040039,4128.589844,4164.660156,4441080000,34080.199219,34147.78125,33765.679688,34133.03125,407860000,18.16,21.85,18.110001,19.48,6969.799805,7034.299805,6912.299805,6923.200195,911981600,4001.129883,4018.139893,3919.459961,3924.800049,35163600,1388.952026,1388.972046,1366.589966,1369.156982,13774.509766,13795.570312,13485.589844,13633.5,5852850000,16325.240234,16325.240234,16156.990234,16289.269531,4441080000,3029.290039,3048.929932,3000.77002,3007.540039,0,2275.100098,2275.100098,2225.699951,2248.290039,4441080000,1237.959961,1241.109985,...,32973,110.974998,111.349998,110.400002,111.150002,455,0.8674,0.86885,0.86464,0.86722,1.0992,1.10104,1.0966,1.0992,131.548996,131.630997,131.149002,131.520004,151.649002,152.041,151.005997,151.656998,0.71922,0.72251,0.7191,0.71923,0.8292,0.83333,0.828981,0.8291,1.2277,1.23473,1.2277,1.22801,1.288328,1.30251,1.288162,1.28884,20.1754,20.323299,20.1754,20.187799,7.7663,7.76781,7.76583,7.7661,109.085999,109.481003,109.046997,109.086998
2021-05-05,4177.060059,4187.720215,4160.939941,4167.589844,4029050000,34163.988281,34331.199219,34039.660156,34230.339844,253180000,18.84,19.58,17.889999,19.15,6923.200195,7047.799805,6923.200195,7039.299805,734979800,3927.290039,4003.090088,3927.290039,4002.790039,30131400,1369.156982,1369.156982,1369.156982,1369.156982,13731.129883,13753.049805,13553.929688,13582.419922,4517100000,16289.269531,16394.619141,16284.740234,16348.410156,4029050000,3007.540039,3044.77002,2995.810059,3041.080078,0,2250.129883,2260.0,2232.860107,2241.370117,4029050000,1227.359985,1238.939941,...,32973,111.050003,111.525002,110.650002,111.375,455,0.8648,0.8651,0.86229,0.865,1.0976,1.0989,1.095,1.09759,131.320007,131.453995,131.0,131.352997,151.809998,152.210007,151.759995,151.811996,0.72001,0.72057,0.71806,0.72004,0.8322,0.83422,0.8313,0.83229,1.23,1.23052,1.225,1.23002,1.296176,1.29764,1.289657,1.296378,20.203449,20.251471,20.1528,20.193899,7.76781,7.76973,7.7669,7.76818,109.325996,109.462997,109.152,109.329002
2021-05-06,4169.140137,4179.859863,4147.330078,4179.819824,791890534,34245.109375,34410.710938,34185.058594,34410.570312,115782909,18.41,20.6,18.209999,19.219999,7039.299805,7067.620117,7026.75,7062.149902,0,3997.699951,4022.73999,3974.820068,3989.800049,0,1471.290039,1492.469971,1439.219971,1479.659058,13557.799805,13578.144531,13439.388672,13570.795898,2080665000,16348.400391,16385.175781,16275.15625,16343.010742,0,3041.080078,3069.356689,3037.616211,3068.587646,0,2240.206055,2240.206055,2205.59375,2214.805664,0,1240.680054,1244.98999,...,15972,114.724998,115.0,113.099998,113.574997,9454,0.863,0.86987,0.8627,0.86842,1.0961,1.09725,1.09372,1.09543,131.082001,131.848999,131.0,131.554001,151.817993,152.125,151.220001,151.473999,0.71931,0.72125,0.71753,0.72055,0.8324,0.8335,0.828,0.8294,1.22653,1.22884,1.2177,1.2189,1.29068,1.29827,1.2858,1.2868,20.23111,20.29611,20.132999,20.169001,7.76899,7.7699,7.7675,7.76825,109.189003,109.428001,109.0,109.155998


### Output a stock data file

In [177]:
yahoo_stock_df.to_csv(os.path.join(scrape_directory, "all_stock_data.csv"))

# ----------------------------------------------------------------------------------------------------------------

In [14]:
merged_internal_data = pd.read_csv(os.path.join(scrape_directory, "all_internal_data.csv"))
yahoo_stock_df = pd.read_csv(os.path.join(scrape_directory, "all_stock_data.csv"))

# Get economic data

### Load US data from FRED API into a dataframe

The Federal Reserve Bank of St. Louis’s FRED API, accessed via the quandl module, is very convenient. Quandl requires an API token to increase the calling limit and is easily implemented.

#### Scrape the data

In [56]:
quandl.ApiConfig.api_key = os.environ.get('QUANDL_API_KEY')

In [57]:
fred_indicator_tickers = config_variables["fred_tickers"]

# this is code to iterate through all of the time series on the FRED and scrape their indicators
#fred_indicator_tickers = dc.scrape_fed_economic_data_codes(config_variables)

In [135]:
fred_df = dc.scrape_fred_indicators(scrape_directory, fred_indicator_tickers, start_date)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=59.0), HTML(value='')))




#### Check where nan values are in this data

In [155]:
fred_nan_col_dates = dc.find_col_nan_ranges(fred_df, output=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))


---------------------------------------------------------------------
1 columns had a 'NaN' value in them:
['us_30_year_be_inflation_rate']
---------------------------------------------------------------------
The date ranges in these columns where the NaN's are located are:
{'us_30_year_be_inflation_rate': [('2009-01-01', '2010-02-01')]}


#### Manually inspect the data

In [82]:
fred_df.shape

(4509, 59)

In [136]:
fred_df.head()

Unnamed: 0_level_0,us_gdp,us_real_GDP,us_real_pot_gdp,us_cpi_all,us_cpi_no_food_and_energy,us_cpi_no_food_and_energy_perc_change,us_median_cpi,us_gdp_ipd,us_monetart_base,us_m1_money,us_m2_money,us_velocity_m1_money,us_velocity_m2_money,us_fed_funds_rate,us_3_month_secondary_tbill,us_5_year_treasury_const_mat_rate,us_10_year_treasury_const_mat_rate,us_30_year_treasury_const_mat_rate,us_5_year_be_inflation_rate,us_10_year_be_inflation_rate,us_20_year_be_inflation_rate,us_30_year_be_inflation_rate,us_5_yr_5_yr_fwd_inflatn_expectation,us_ted_spread,us_prime_loan_rate,us_unemployment_rate,us_long_natural_unemployment_rate,us_short_natural_unemployment_rate,us_labour_force_employ_rate,us_pop_employ_rate,us_num_unemployed,us_nonfarm_num_employed,us_num_employed_in_manufactoring,us_num_filing_for_unemployment,us_median_house_income,us_total_real_disposable_income,us_tot_personal_consumption_spend,us_tot_personal_consumption_spend_dg,us_percent_personal_saving_rate,us_real_retail_and_food_sales,us_total_disposble_income,us_industry_production_index,us_capacity_utilisation,us_new_housing_devs_started,us_gross_private_domestic_investment,us_corporate_profit_aftr_tax,us_financial_stress_index,west_texas_crude_oil_price,us_leading_index,us_currency_trade_weighted_dollar_index,us_broad_trade_weighted_dollar_index,us_total_public_debt,us_public_debt_as_perc_of_gdp,us_bank_excess_capital_reserves,us_total_commercial_loans,us_10_year_yield,us_5_year_yield,us_3_year_yield,us_2_year_yield
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1
2009-01-01,14394.547,15155.94,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.14,0.11,1.55,2.25,2.69,-0.26,0.11,1.0,,0.48,1.32,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,533000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,91.0373,69.9566,490.0,2014.878,1082.708,3.308,44.6,-2.32,79.5258,107.235,11126941.0,77.2997,798335.0,1537.8605,2.776,1.6039,0.958,0.6146
2009-01-02,14394.547,15155.94,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.08,0.08,1.72,2.46,2.83,-0.25,0.17,1.0,,0.59,1.33,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,533000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,91.0373,69.9566,490.0,2014.878,1082.708,3.246,46.17,-2.32,79.257,107.2518,11126941.0,77.2997,798335.0,1537.8605,2.9618,1.768,1.0916,0.7202
2009-01-03,14394.547,15155.94,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.08,0.08,1.72,2.46,2.83,-0.25,0.17,1.0,,0.59,1.33,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,503000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,91.0373,69.9566,490.0,2014.878,1082.708,3.246,46.17,-2.32,79.257,107.2518,11126941.0,77.2997,798335.0,1537.8605,2.9618,1.768,1.0916,0.7202
2009-01-04,14394.547,15155.94,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.08,0.08,1.72,2.46,2.83,-0.25,0.17,1.0,,0.59,1.33,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,503000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,91.0373,69.9566,490.0,2014.878,1082.708,3.246,46.17,-2.32,79.257,107.2518,11126941.0,77.2997,798335.0,1537.8605,2.9618,1.768,1.0916,0.7202
2009-01-05,14394.547,15155.94,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1629.7,8261.5,9.132,1.731,0.11,0.14,1.67,2.49,3.0,-0.19,0.15,1.0,,0.49,1.28,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,503000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,91.0373,69.9566,490.0,2014.878,1082.708,3.246,48.61,-2.32,80.0914,107.5888,11126941.0,77.2997,798335.0,1537.8605,3.013,1.7816,1.0812,0.7011


In [137]:
fred_df.tail()

Unnamed: 0_level_0,us_gdp,us_real_GDP,us_real_pot_gdp,us_cpi_all,us_cpi_no_food_and_energy,us_cpi_no_food_and_energy_perc_change,us_median_cpi,us_gdp_ipd,us_monetart_base,us_m1_money,us_m2_money,us_velocity_m1_money,us_velocity_m2_money,us_fed_funds_rate,us_3_month_secondary_tbill,us_5_year_treasury_const_mat_rate,us_10_year_treasury_const_mat_rate,us_30_year_treasury_const_mat_rate,us_5_year_be_inflation_rate,us_10_year_be_inflation_rate,us_20_year_be_inflation_rate,us_30_year_be_inflation_rate,us_5_yr_5_yr_fwd_inflatn_expectation,us_ted_spread,us_prime_loan_rate,us_unemployment_rate,us_long_natural_unemployment_rate,us_short_natural_unemployment_rate,us_labour_force_employ_rate,us_pop_employ_rate,us_num_unemployed,us_nonfarm_num_employed,us_num_employed_in_manufactoring,us_num_filing_for_unemployment,us_median_house_income,us_total_real_disposable_income,us_tot_personal_consumption_spend,us_tot_personal_consumption_spend_dg,us_percent_personal_saving_rate,us_real_retail_and_food_sales,us_total_disposble_income,us_industry_production_index,us_capacity_utilisation,us_new_housing_devs_started,us_gross_private_domestic_investment,us_corporate_profit_aftr_tax,us_financial_stress_index,west_texas_crude_oil_price,us_leading_index,us_currency_trade_weighted_dollar_index,us_broad_trade_weighted_dollar_index,us_total_public_debt,us_public_debt_as_perc_of_gdp,us_bank_excess_capital_reserves,us_total_commercial_loans,us_10_year_yield,us_5_year_yield,us_3_year_yield,us_2_year_yield
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1
2021-05-02,22048.894,19087.568,19594.22,264.793,271.214,1.672912,1.833169,115.514,3382.8,18136.0,19417.7,1.198,1.123,0.05,0.01,0.86,1.65,2.3,2.57,2.41,2.38,2.25,2.25,0.17,3.25,6.0,4.463892,4.463892,61.5,57.8,9710.0,144120.0,12284.0,553000.0,68703.0,19335.6,15401.6,2010.9,27.6,232049.0,21902.4,105.583,74.3914,1421.0,3901.831,2082.47,-0.07,62.02,1.72,90.8221,128.0097,27747798.0,129.09116,2854690.0,2566.6644,1.7179,0.8023,0.39,0.2002
2021-05-03,22048.894,19087.568,19594.22,264.793,271.214,1.672912,1.833169,115.514,3382.8,18136.0,19417.7,1.198,1.123,0.06,0.04,0.84,1.65,2.3,2.6,2.41,2.38,2.25,2.24,0.17,3.25,6.0,4.463892,4.463892,61.5,57.8,9710.0,144120.0,12284.0,553000.0,68703.0,19335.6,15401.6,2010.9,27.6,232049.0,21902.4,105.583,74.3914,1421.0,3901.831,2082.47,-0.07,62.02,1.72,90.8221,128.0097,27747798.0,129.09116,2854690.0,2566.6644,1.7179,0.8023,0.39,0.2002
2021-05-04,22048.894,19087.568,19594.22,264.793,271.214,1.672912,1.833169,115.514,3382.8,18136.0,19417.7,1.198,1.123,0.06,0.04,0.84,1.65,2.3,2.62,2.41,2.38,2.25,2.22,0.17,3.25,6.0,4.463892,4.463892,61.5,57.8,9710.0,144120.0,12284.0,553000.0,68703.0,19335.6,15401.6,2010.9,27.6,232049.0,21902.4,105.583,74.3914,1421.0,3901.831,2082.47,-0.07,62.02,1.72,90.8221,128.0097,27747798.0,129.09116,2854690.0,2566.6644,1.7179,0.8023,0.39,0.2002
2021-05-05,22048.894,19087.568,19594.22,264.793,271.214,1.672912,1.833169,115.514,3382.8,18136.0,19417.7,1.198,1.123,0.06,0.04,0.84,1.65,2.3,2.62,2.41,2.38,2.25,2.22,0.17,3.25,6.0,4.463892,4.463892,61.5,57.8,9710.0,144120.0,12284.0,553000.0,68703.0,19335.6,15401.6,2010.9,27.6,232049.0,21902.4,105.583,74.3914,1421.0,3901.831,2082.47,-0.07,62.02,1.72,90.8221,128.0097,27747798.0,129.09116,2854690.0,2566.6644,1.7179,0.8023,0.39,0.2002
2021-05-06,22048.894,19087.568,19594.22,264.793,271.214,1.672912,1.833169,115.514,3382.8,18136.0,19417.7,1.198,1.123,0.06,0.04,0.84,1.65,2.3,2.62,2.41,2.38,2.25,2.22,0.17,3.25,6.0,4.463892,4.463892,61.5,57.8,9710.0,144120.0,12284.0,553000.0,68703.0,19335.6,15401.6,2010.9,27.6,232049.0,21902.4,105.583,74.3914,1421.0,3901.831,2082.47,-0.07,62.02,1.72,90.8221,128.0097,27747798.0,129.09116,2854690.0,2566.6644,1.7179,0.8023,0.39,0.2002


##### Load more data from Quandl API

try:
    copper_download = quandl.get("CHRIS/CME_HG2", start_date=start_date)
    print(copper_download)
    gold_download = quandl.get("CHRIS/CME_GC2", start_date=start_date)
    print(gold_download)

except Exception as e:
    print(e)
    
    
This returns:
1. Open
2. High
3. Low
4. Last
5. Change
6. Settle
7. Volume
8. Previous Day Open Interest 

### Get economic data from db.nomics.world

#### Scrape the data

In [219]:
time_periods_dict = config_variables["db_nomics_time_periods"]
standalone_tickers = config_variables["db_nomics_eurostat_tickers_standalone"]

country_codes_dict = config_variables["db_nomics_countries"]
country_specific_tickers = config_variables["db_nomics_eurostat_tickers_for_countries"]

In [240]:
dbnomics_data, failed_urls = dc.scrape_db_nomics_economic_data(scrape_directory, start_date, standalone_tickers, country_specific_tickers, country_codes_dict, time_periods_dict)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=561.0), HTML(value='')))




#### Check where nan values are in this data

In [241]:
dbnomics_nan_col_dates = dc.find_col_nan_ranges(dbnomics_data, output=True)

[autoreload of data_collection_functions failed: Traceback (most recent call last):
  File "C:\Users\Owner\.conda\envs\python_all\lib\site-packages\IPython\extensions\autoreload.py", line 245, in check
    superreload(m, reload, self.old_objects)
  File "C:\Users\Owner\.conda\envs\python_all\lib\site-packages\IPython\extensions\autoreload.py", line 394, in superreload
    module = reload(module)
  File "C:\Users\Owner\.conda\envs\python_all\lib\imp.py", line 314, in reload
    return importlib.reload(module)
  File "C:\Users\Owner\.conda\envs\python_all\lib\importlib\__init__.py", line 169, in reload
    _bootstrap._exec(spec, module)
  File "<frozen importlib._bootstrap>", line 604, in _exec
  File "<frozen importlib._bootstrap_external>", line 779, in exec_module
  File "<frozen importlib._bootstrap_external>", line 916, in get_code
  File "<frozen importlib._bootstrap_external>", line 846, in source_to_code
  File "<frozen importlib._bootstrap>", line 219, in _call_with_frames_remov

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=23.0), HTML(value='')))


---------------------------------------------------------------------
23 columns had a 'NaN' value in them:
['turkey_house_price_index',
 'italy_house_price_index',
 'turkey_secondary_house_purchases',
 'italy_secondary_house_purchases',
 'turkey_housing_price_index',
 'italy_housing_price_index',
 'germany_job_vacancy_rate',
 'turkey_retail_business_sales_confidence',
 'denmark_retail_business_sales_confidence',
 'turkey_services_business_3m_price_expectation',
 'denmark_services_business_3m_price_expectation',
 'turkey_business_employment_expectation',
 'denmark_gov_debt_all',
 'sweden_gov_debt_all',
 'euro_area_percent_afford_unexpected_expenses',
 'germany_trust_in_legal_system',
 'france_trust_in_legal_system',
 'turkey_trust_in_legal_system',
 'uk_trust_in_legal_system',
 'italy_trust_in_legal_system',
 'belgium_trust_in_legal_system',
 'denmark_trust_in_legal_system',
 'sweden_trust_in_legal_system']
---------------------------------------------------------------------
The date

#### Manually inspect the data

In [242]:
dbnomics_data.shape

(4510, 207)

In [243]:
dbnomics_data.head()

Unnamed: 0_level_0,all_euro_forward_yields_Y10,all_euro_forward_yields_Y5,all_euro_forward_yields_Y2,all_euro_forward_yields_Y1,all_euro_forward_yields_M6,all_euro_forward_yields_M3,all_euro_spot_yields_Y10,all_euro_spot_yields_Y5,all_euro_spot_yields_Y2,all_euro_spot_yields_Y1,all_euro_spot_yields_M6,all_euro_spot_yields_M3,turkey_government_10_yr_bonds,japan_government_10_yr_bonds,us_government_10_yr_bonds,euro_area_emu_convergence_bond_yield,germany_emu_convergence_bond_yield,france_emu_convergence_bond_yield,uk_emu_convergence_bond_yield,italy_emu_convergence_bond_yield,belgium_emu_convergence_bond_yield,denmark_emu_convergence_bond_yield,sweden_emu_convergence_bond_yield,euro_19_gov_debt,germany_gov_debt,france_gov_debt,italy_gov_debt,belgium_gov_debt,denmark_gov_debt,sweden_gov_debt,euro_area_harmonised_index_consumer_prices,euro_19_harmonised_index_consumer_prices,germany_harmonised_index_consumer_prices,france_harmonised_index_consumer_prices,turkey_harmonised_index_consumer_prices,uk_harmonised_index_consumer_prices,us_harmonised_index_consumer_prices,italy_harmonised_index_consumer_prices,belgium_harmonised_index_consumer_prices,denmark_harmonised_index_consumer_prices,sweden_harmonised_index_consumer_prices,euro_area_house_price_index,euro_19_house_price_index,germany_house_price_index,france_house_price_index,turkey_house_price_index,uk_house_price_index,italy_house_price_index,belgium_house_price_index,denmark_house_price_index,...,italy_retail_business_sales_confidence,belgium_retail_business_sales_confidence,denmark_retail_business_sales_confidence,sweden_retail_business_sales_confidence,euro_19_services_business_3m_price_expectation,germany_services_business_3m_price_expectation,france_services_business_3m_price_expectation,turkey_services_business_3m_price_expectation,uk_services_business_3m_price_expectation,italy_services_business_3m_price_expectation,belgium_services_business_3m_price_expectation,denmark_services_business_3m_price_expectation,sweden_services_business_3m_price_expectation,euro_19_business_employment_expectation,germany_business_employment_expectation,france_business_employment_expectation,turkey_business_employment_expectation,uk_business_employment_expectation,italy_business_employment_expectation,belgium_business_employment_expectation,denmark_business_employment_expectation,sweden_business_employment_expectation,germany_percent_housholds_w_internet,france_percent_housholds_w_internet,turkey_percent_housholds_w_internet,uk_percent_housholds_w_internet,italy_percent_housholds_w_internet,belgium_percent_housholds_w_internet,denmark_percent_housholds_w_internet,sweden_percent_housholds_w_internet,denmark_gov_debt_all,sweden_gov_debt_all,euro_area_percent_afford_unexpected_expenses,euro_19_percent_afford_unexpected_expenses,germany_percent_afford_unexpected_expenses,france_percent_afford_unexpected_expenses,turkey_percent_afford_unexpected_expenses,uk_percent_afford_unexpected_expenses,italy_percent_afford_unexpected_expenses,belgium_percent_afford_unexpected_expenses,denmark_percent_afford_unexpected_expenses,sweden_percent_afford_unexpected_expenses,germany_trust_in_legal_system,france_trust_in_legal_system,turkey_trust_in_legal_system,uk_trust_in_legal_system,italy_trust_in_legal_system,belgium_trust_in_legal_system,denmark_trust_in_legal_system,sweden_trust_in_legal_system
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2009-01-01,4.86969,4.22276,3.13145,2.45337,1.91688,1.71857,3.92322,3.20703,2.41026,1.9939,1.79447,1.78778,15.96,1.26,2.49,3.68,2.93,3.41,3.04,4.49,3.77,3.31,2.43,286127.5,10321.0,25629.0,212368.8,1185.0,1917.0,5937.8,90.71,90.73,91.1,91.67,61.66,84.9,88.32,89.5,88.5,90.8,93.02,100.63,100.59,81.8,95.51,,78.69,,88.95,87.14,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-02,4.86586,4.24995,3.14731,2.43366,1.86285,1.66169,3.93396,3.21408,2.38987,1.95111,1.74592,1.74835,15.96,1.26,2.49,3.68,2.97,3.38,2.84,4.45,3.8,3.31,2.44,286127.5,10321.0,25629.0,212368.8,1185.0,1917.0,5937.8,90.71,90.73,91.1,91.67,61.66,84.9,88.32,89.5,88.5,90.8,93.02,100.63,100.59,81.8,95.51,,78.69,,88.95,87.14,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-03,4.86586,4.24995,3.14731,2.43366,1.86285,1.66169,3.93396,3.21408,2.38987,1.95111,1.74592,1.74835,15.96,1.26,2.49,3.68,2.97,3.38,2.84,4.45,3.8,3.31,2.44,286127.5,10321.0,25629.0,212368.8,1185.0,1917.0,5937.8,90.71,90.73,91.1,91.67,61.66,84.9,88.32,89.5,88.5,90.8,93.02,100.63,100.59,81.8,95.51,,78.69,,88.95,87.14,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-04,4.86586,4.24995,3.14731,2.43366,1.86285,1.66169,3.93396,3.21408,2.38987,1.95111,1.74592,1.74835,15.96,1.26,2.49,3.68,2.97,3.38,2.84,4.45,3.8,3.31,2.44,286127.5,10321.0,25629.0,212368.8,1185.0,1917.0,5937.8,90.71,90.73,91.1,91.67,61.66,84.9,88.32,89.5,88.5,90.8,93.02,100.63,100.59,81.8,95.51,,78.69,,88.95,87.14,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-05,4.91925,4.2568,3.12237,2.39963,1.85158,1.66637,3.9422,3.1999,2.36672,1.93611,1.7415,1.74164,15.96,1.26,2.49,3.72,3.0,3.48,2.96,4.47,3.81,3.32,2.55,286127.5,10321.0,25629.0,212368.8,1185.0,1917.0,5937.8,90.71,90.73,91.1,91.67,61.66,84.9,88.32,89.5,88.5,90.8,93.02,100.63,100.59,81.8,95.51,,78.69,,88.95,87.14,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,


In [244]:
dbnomics_data.tail()

Unnamed: 0_level_0,all_euro_forward_yields_Y10,all_euro_forward_yields_Y5,all_euro_forward_yields_Y2,all_euro_forward_yields_Y1,all_euro_forward_yields_M6,all_euro_forward_yields_M3,all_euro_spot_yields_Y10,all_euro_spot_yields_Y5,all_euro_spot_yields_Y2,all_euro_spot_yields_Y1,all_euro_spot_yields_M6,all_euro_spot_yields_M3,turkey_government_10_yr_bonds,japan_government_10_yr_bonds,us_government_10_yr_bonds,euro_area_emu_convergence_bond_yield,germany_emu_convergence_bond_yield,france_emu_convergence_bond_yield,uk_emu_convergence_bond_yield,italy_emu_convergence_bond_yield,belgium_emu_convergence_bond_yield,denmark_emu_convergence_bond_yield,sweden_emu_convergence_bond_yield,euro_19_gov_debt,germany_gov_debt,france_gov_debt,italy_gov_debt,belgium_gov_debt,denmark_gov_debt,sweden_gov_debt,euro_area_harmonised_index_consumer_prices,euro_19_harmonised_index_consumer_prices,germany_harmonised_index_consumer_prices,france_harmonised_index_consumer_prices,turkey_harmonised_index_consumer_prices,uk_harmonised_index_consumer_prices,us_harmonised_index_consumer_prices,italy_harmonised_index_consumer_prices,belgium_harmonised_index_consumer_prices,denmark_harmonised_index_consumer_prices,sweden_harmonised_index_consumer_prices,euro_area_house_price_index,euro_19_house_price_index,germany_house_price_index,france_house_price_index,turkey_house_price_index,uk_house_price_index,italy_house_price_index,belgium_house_price_index,denmark_house_price_index,...,italy_retail_business_sales_confidence,belgium_retail_business_sales_confidence,denmark_retail_business_sales_confidence,sweden_retail_business_sales_confidence,euro_19_services_business_3m_price_expectation,germany_services_business_3m_price_expectation,france_services_business_3m_price_expectation,turkey_services_business_3m_price_expectation,uk_services_business_3m_price_expectation,italy_services_business_3m_price_expectation,belgium_services_business_3m_price_expectation,denmark_services_business_3m_price_expectation,sweden_services_business_3m_price_expectation,euro_19_business_employment_expectation,germany_business_employment_expectation,france_business_employment_expectation,turkey_business_employment_expectation,uk_business_employment_expectation,italy_business_employment_expectation,belgium_business_employment_expectation,denmark_business_employment_expectation,sweden_business_employment_expectation,germany_percent_housholds_w_internet,france_percent_housholds_w_internet,turkey_percent_housholds_w_internet,uk_percent_housholds_w_internet,italy_percent_housholds_w_internet,belgium_percent_housholds_w_internet,denmark_percent_housholds_w_internet,sweden_percent_housholds_w_internet,denmark_gov_debt_all,sweden_gov_debt_all,euro_area_percent_afford_unexpected_expenses,euro_19_percent_afford_unexpected_expenses,germany_percent_afford_unexpected_expenses,france_percent_afford_unexpected_expenses,turkey_percent_afford_unexpected_expenses,uk_percent_afford_unexpected_expenses,italy_percent_afford_unexpected_expenses,belgium_percent_afford_unexpected_expenses,denmark_percent_afford_unexpected_expenses,sweden_percent_afford_unexpected_expenses,germany_trust_in_legal_system,france_trust_in_legal_system,turkey_trust_in_legal_system,uk_trust_in_legal_system,italy_trust_in_legal_system,belgium_trust_in_legal_system,denmark_trust_in_legal_system,sweden_trust_in_legal_system
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2021-05-03,1.12796,0.19202,-0.44605,-0.56736,-0.59439,-0.59799,0.19883,-0.30744,-0.55162,-0.5899,-0.59681,-0.59686,15.56,0.09,1.62,0.17,-0.22,0.01,0.84,0.91,0.17,0.05,0.41,365392.2,14768.0,45938.0,228928.7,1465.0,2792.7,7485.5,107.16,106.53,107.9,107.09,200.65,108.9,109.55,105.1,110.31,103.6,109.22,127.23,127.23,143.5,119.49,191.9,121.15,100.1,122.25,127.84,...,11.3,17.9,11.8,-2.0,3.5,13.1,-6.5,24.1,-11.5,-3.6,8.1,-1.1,9.0,9.5,2.6,15.8,-8.4,-11.9,3.4,11.4,15.3,-2.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-05-04,1.11484,0.17375,-0.47333,-0.58429,-0.60269,-0.60141,0.18258,-0.32903,-0.56713,-0.59799,-0.60022,-0.5978,15.56,0.09,1.62,0.17,-0.22,0.01,0.8,0.91,0.13,0.05,0.41,365392.2,14768.0,45938.0,228928.7,1465.0,2792.7,7485.5,107.16,106.53,107.9,107.09,200.65,108.9,109.55,105.1,110.31,103.6,109.22,127.23,127.23,143.5,119.49,191.9,121.15,100.1,122.25,127.84,...,11.3,17.9,11.8,-2.0,3.5,13.1,-6.5,24.1,-11.5,-3.6,8.1,-1.1,9.0,9.5,2.6,15.8,-8.4,-11.9,3.4,11.4,15.3,-2.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-05-05,1.11484,0.17375,-0.47333,-0.58429,-0.60269,-0.60141,0.18258,-0.32903,-0.56713,-0.59799,-0.60022,-0.5978,15.56,0.09,1.62,0.17,-0.22,0.01,0.82,0.91,0.13,0.05,0.41,365392.2,14768.0,45938.0,228928.7,1465.0,2792.7,7485.5,107.16,106.53,107.9,107.09,200.65,108.9,109.55,105.1,110.31,103.6,109.22,127.23,127.23,143.5,119.49,191.9,121.15,100.1,122.25,127.84,...,11.3,17.9,11.8,-2.0,3.5,13.1,-6.5,24.1,-11.5,-3.6,8.1,-1.1,9.0,9.5,2.6,15.8,-8.4,-11.9,3.4,11.4,15.3,-2.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-05-06,1.11484,0.17375,-0.47333,-0.58429,-0.60269,-0.60141,0.18258,-0.32903,-0.56713,-0.59799,-0.60022,-0.5978,15.56,0.09,1.62,0.17,-0.22,0.01,0.82,0.91,0.13,0.05,0.41,365392.2,14768.0,45938.0,228928.7,1465.0,2792.7,7485.5,107.16,106.53,107.9,107.09,200.65,108.9,109.55,105.1,110.31,103.6,109.22,127.23,127.23,143.5,119.49,191.9,121.15,100.1,122.25,127.84,...,11.3,17.9,11.8,-2.0,3.5,13.1,-6.5,24.1,-11.5,-3.6,8.1,-1.1,9.0,9.5,2.6,15.8,-8.4,-11.9,3.4,11.4,15.3,-2.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-05-07,1.11484,0.17375,-0.47333,-0.58429,-0.60269,-0.60141,0.18258,-0.32903,-0.56713,-0.59799,-0.60022,-0.5978,15.56,0.09,1.62,0.17,-0.22,0.01,0.82,0.91,0.13,0.05,0.41,365392.2,14768.0,45938.0,228928.7,1465.0,2792.7,7485.5,107.16,106.53,107.9,107.09,200.65,108.9,109.55,105.1,110.31,103.6,109.22,127.23,127.23,143.5,119.49,191.9,121.15,100.1,122.25,127.84,...,11.3,17.9,11.8,-2.0,3.5,13.1,-6.5,24.1,-11.5,-3.6,8.1,-1.1,9.0,9.5,2.6,15.8,-8.4,-11.9,3.4,11.4,15.3,-2.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7


### Merge these economic datasets together

In [245]:
# create a master dataframe with all the columns from both data sources
list_of_economic_dfs = [fred_df, dbnomics_data]
merged_economic_df = dc.merge_dfs_on_index(list_of_economic_dfs)

In [246]:
merged_economic_df

Unnamed: 0_level_0,us_gdp,us_real_GDP,us_real_pot_gdp,us_cpi_all,us_cpi_no_food_and_energy,us_cpi_no_food_and_energy_perc_change,us_median_cpi,us_gdp_ipd,us_monetart_base,us_m1_money,us_m2_money,us_velocity_m1_money,us_velocity_m2_money,us_fed_funds_rate,us_3_month_secondary_tbill,us_5_year_treasury_const_mat_rate,us_10_year_treasury_const_mat_rate,us_30_year_treasury_const_mat_rate,us_5_year_be_inflation_rate,us_10_year_be_inflation_rate,us_20_year_be_inflation_rate,us_30_year_be_inflation_rate,us_5_yr_5_yr_fwd_inflatn_expectation,us_ted_spread,us_prime_loan_rate,us_unemployment_rate,us_long_natural_unemployment_rate,us_short_natural_unemployment_rate,us_labour_force_employ_rate,us_pop_employ_rate,us_num_unemployed,us_nonfarm_num_employed,us_num_employed_in_manufactoring,us_num_filing_for_unemployment,us_median_house_income,us_total_real_disposable_income,us_tot_personal_consumption_spend,us_tot_personal_consumption_spend_dg,us_percent_personal_saving_rate,us_real_retail_and_food_sales,us_total_disposble_income,us_industry_production_index,us_capacity_utilisation,us_new_housing_devs_started,us_gross_private_domestic_investment,us_corporate_profit_aftr_tax,us_financial_stress_index,west_texas_crude_oil_price,us_leading_index,us_currency_trade_weighted_dollar_index,...,italy_retail_business_sales_confidence,belgium_retail_business_sales_confidence,denmark_retail_business_sales_confidence,sweden_retail_business_sales_confidence,euro_19_services_business_3m_price_expectation,germany_services_business_3m_price_expectation,france_services_business_3m_price_expectation,turkey_services_business_3m_price_expectation,uk_services_business_3m_price_expectation,italy_services_business_3m_price_expectation,belgium_services_business_3m_price_expectation,denmark_services_business_3m_price_expectation,sweden_services_business_3m_price_expectation,euro_19_business_employment_expectation,germany_business_employment_expectation,france_business_employment_expectation,turkey_business_employment_expectation,uk_business_employment_expectation,italy_business_employment_expectation,belgium_business_employment_expectation,denmark_business_employment_expectation,sweden_business_employment_expectation,germany_percent_housholds_w_internet,france_percent_housholds_w_internet,turkey_percent_housholds_w_internet,uk_percent_housholds_w_internet,italy_percent_housholds_w_internet,belgium_percent_housholds_w_internet,denmark_percent_housholds_w_internet,sweden_percent_housholds_w_internet,denmark_gov_debt_all,sweden_gov_debt_all,euro_area_percent_afford_unexpected_expenses,euro_19_percent_afford_unexpected_expenses,germany_percent_afford_unexpected_expenses,france_percent_afford_unexpected_expenses,turkey_percent_afford_unexpected_expenses,uk_percent_afford_unexpected_expenses,italy_percent_afford_unexpected_expenses,belgium_percent_afford_unexpected_expenses,denmark_percent_afford_unexpected_expenses,sweden_percent_afford_unexpected_expenses,germany_trust_in_legal_system,france_trust_in_legal_system,turkey_trust_in_legal_system,uk_trust_in_legal_system,italy_trust_in_legal_system,belgium_trust_in_legal_system,denmark_trust_in_legal_system,sweden_trust_in_legal_system
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2009-01-01,14394.547,15155.940,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.14,0.11,1.55,2.25,2.69,-0.26,0.11,1.00,,0.48,1.32,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,533000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,91.0373,69.9566,490.0,2014.878,1082.708,3.308,44.60,-2.32,79.5258,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-02,14394.547,15155.940,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.08,0.08,1.72,2.46,2.83,-0.25,0.17,1.00,,0.59,1.33,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,533000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,91.0373,69.9566,490.0,2014.878,1082.708,3.246,46.17,-2.32,79.2570,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-03,14394.547,15155.940,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.08,0.08,1.72,2.46,2.83,-0.25,0.17,1.00,,0.59,1.33,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,503000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,91.0373,69.9566,490.0,2014.878,1082.708,3.246,46.17,-2.32,79.2570,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-04,14394.547,15155.940,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.08,0.08,1.72,2.46,2.83,-0.25,0.17,1.00,,0.59,1.33,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,503000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,91.0373,69.9566,490.0,2014.878,1082.708,3.246,46.17,-2.32,79.2570,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-05,14394.547,15155.940,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1629.7,8261.5,9.132,1.731,0.11,0.14,1.67,2.49,3.00,-0.19,0.15,1.00,,0.49,1.28,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,503000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,91.0373,69.9566,490.0,2014.878,1082.708,3.246,48.61,-2.32,80.0914,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-05-03,22048.894,19087.568,19594.22000,264.793,271.214,1.672912,1.833169,115.514,3382.800,18136.0,19417.7,1.198,1.123,0.06,0.04,0.84,1.65,2.30,2.60,2.41,2.38,2.25,2.24,0.17,3.25,6.0,4.463892,4.463892,61.5,57.8,9710.0,144120.0,12284.0,553000.0,68703.0,19335.6,15401.6,2010.9,27.6,232049.0,21902.4,105.5830,74.3914,1421.0,3901.831,2082.470,-0.070,62.02,1.72,90.8221,...,11.3,17.9,11.8,-2.0,3.5,13.1,-6.5,24.1,-11.5,-3.6,8.1,-1.1,9.0,9.5,2.6,15.8,-8.4,-11.9,3.4,11.4,15.3,-2.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-05-04,22048.894,19087.568,19594.22000,264.793,271.214,1.672912,1.833169,115.514,3382.800,18136.0,19417.7,1.198,1.123,0.06,0.04,0.84,1.65,2.30,2.62,2.41,2.38,2.25,2.22,0.17,3.25,6.0,4.463892,4.463892,61.5,57.8,9710.0,144120.0,12284.0,553000.0,68703.0,19335.6,15401.6,2010.9,27.6,232049.0,21902.4,105.5830,74.3914,1421.0,3901.831,2082.470,-0.070,62.02,1.72,90.8221,...,11.3,17.9,11.8,-2.0,3.5,13.1,-6.5,24.1,-11.5,-3.6,8.1,-1.1,9.0,9.5,2.6,15.8,-8.4,-11.9,3.4,11.4,15.3,-2.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-05-05,22048.894,19087.568,19594.22000,264.793,271.214,1.672912,1.833169,115.514,3382.800,18136.0,19417.7,1.198,1.123,0.06,0.04,0.84,1.65,2.30,2.62,2.41,2.38,2.25,2.22,0.17,3.25,6.0,4.463892,4.463892,61.5,57.8,9710.0,144120.0,12284.0,553000.0,68703.0,19335.6,15401.6,2010.9,27.6,232049.0,21902.4,105.5830,74.3914,1421.0,3901.831,2082.470,-0.070,62.02,1.72,90.8221,...,11.3,17.9,11.8,-2.0,3.5,13.1,-6.5,24.1,-11.5,-3.6,8.1,-1.1,9.0,9.5,2.6,15.8,-8.4,-11.9,3.4,11.4,15.3,-2.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-05-06,22048.894,19087.568,19594.22000,264.793,271.214,1.672912,1.833169,115.514,3382.800,18136.0,19417.7,1.198,1.123,0.06,0.04,0.84,1.65,2.30,2.62,2.41,2.38,2.25,2.22,0.17,3.25,6.0,4.463892,4.463892,61.5,57.8,9710.0,144120.0,12284.0,553000.0,68703.0,19335.6,15401.6,2010.9,27.6,232049.0,21902.4,105.5830,74.3914,1421.0,3901.831,2082.470,-0.070,62.02,1.72,90.8221,...,11.3,17.9,11.8,-2.0,3.5,13.1,-6.5,24.1,-11.5,-3.6,8.1,-1.1,9.0,9.5,2.6,15.8,-8.4,-11.9,3.4,11.4,15.3,-2.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7


#### Check for nan's

In [247]:
merged_economic_nan_col_dates = dc.find_col_nan_ranges(merged_economic_df, output=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=82.0), HTML(value='')))


---------------------------------------------------------------------
82 columns had a 'NaN' value in them:
['us_gdp',
 'us_real_GDP',
 'us_real_pot_gdp',
 'us_cpi_all',
 'us_cpi_no_food_and_energy',
 'us_cpi_no_food_and_energy_perc_change',
 'us_median_cpi',
 'us_gdp_ipd',
 'us_monetart_base',
 'us_m1_money',
 'us_m2_money',
 'us_velocity_m1_money',
 'us_velocity_m2_money',
 'us_fed_funds_rate',
 'us_3_month_secondary_tbill',
 'us_5_year_treasury_const_mat_rate',
 'us_10_year_treasury_const_mat_rate',
 'us_30_year_treasury_const_mat_rate',
 'us_5_year_be_inflation_rate',
 'us_10_year_be_inflation_rate',
 'us_20_year_be_inflation_rate',
 'us_30_year_be_inflation_rate',
 'us_5_yr_5_yr_fwd_inflatn_expectation',
 'us_ted_spread',
 'us_prime_loan_rate',
 'us_unemployment_rate',
 'us_long_natural_unemployment_rate',
 'us_short_natural_unemployment_rate',
 'us_labour_force_employ_rate',
 'us_pop_employ_rate',
 'us_num_unemployed',
 'us_nonfarm_num_employed',
 'us_num_employed_in_manufactori

### Output an economic data file

In [248]:
merged_economic_df.to_csv(os.path.join(scrape_directory, "all_economic_data.csv"))

# ----------------------------------------------------------------------------------------------------------------

# Get social media data

### Get sentiment analysis data on all bitcoin related tweets

Cant run this because of the rate limit limitations

In [332]:
bitcoin_query_words = config_variables["twitter_bitcoin_query_words"]

In [336]:
twitter_bitcoin_sentiment_df = dc.scrape_tweets_sentiment(bitcoin_query_words, start_date, config_variables)

SystemExit: 0

### Get tweet count data on tweets by specific influencers

##### Bitcoin crypto influencers on Twitter

1. CZ Binance: Changpeng Zao is the Founder and CEO of Binance, a exchange that moves more than 3 Billion.
2. Justin Sun: is a Chinese entrepreneur and the founder of the cryptocurrency TRON or TRX, that first was an ERC-20 token and then launched their self mainnet.
3. John McAfee: is an important developer, businessman and founder of the antivirus company. He is known for commenting important and promising crypto projects.
4. Charlie Lee: is a computer science and creator of Litecoin (LTC). Known also for dumping his own cryptocurrency saying that he already sold nearly all of his assets in LTC.
5. Jackson Palmer: is a computer programmer, writer and founder of Dogecoin (DOGE), one of the most important stable coins in the actual market.
6. Star Xu: CEO and founder of OKCoin that raised more than 10 Million in investment. Xu was suspected of fraud. Investors accused him.

    * https://www.tweetbinder.com/blog/bitcoin-twitter/
    
---

https://consensys.net/blog/trending/i-read-crypto-twitter-for-hours-daily-here-are-the-40-accounts-that-really-matter/

https://coinbound.io/best-crypto-influencers-on-twitter/

https://blog.makerdao.com/the-top-10-crypto-and-defi-influencers-on-twitter/

In [182]:
bitcoin_influencers = config_variables["twitter_bitcoin_influencers"]

In [12]:
twitter_influencer_df = dc.scrape_influencer_tweets(bitcoin_influencers, bitcoin_query, start_date, config_variables)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4484.0), HTML(value='')))

CRITICAL:root:twint.get:User:'NoneType' object is not subscriptable
CRITICAL:root:twint.run:Twint:Feed:noDataExpecting value: line 1 column 1 (char 0)
sleeping for 1.0 secs
CRITICAL:root:twint.run:Twint:Feed:noDataExpecting value: line 1 column 1 (char 0)
sleeping for 8.0 secs





KeyboardInterrupt: 

### Get sentiment of Elon Musk bitcoin related tweets

In [337]:
musk_bitcoin_tweet_sentiment_features = dc.get_musk_bitcoin_sentiment_data(scrape_directory, bitcoin_query_words, start_date)

Scraping tweets from '2010-06-04' --> '2021-03-22'
  - 12339 tweets scraped


#### Get breakdown of tweets

In [338]:
musk_bitcoin_tweet_sentiment_features.head()

Unnamed: 0_level_0,musk_num_tweets,musk_num_pos_tweets,musk_num_neg_tweets,musk_num_neut_tweets,musk_percent_pos,musk_percent_neg,musk_percent_neut
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2009-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [339]:
pd.DataFrame(musk_bitcoin_tweet_sentiment_features.value_counts())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,0
musk_num_tweets,musk_num_pos_tweets,musk_num_neg_tweets,musk_num_neut_tweets,musk_percent_pos,musk_percent_neg,musk_percent_neut,Unnamed: 7_level_1
0.0,0.0,0.0,0.0,0.0,0.0,0.0,4470
1.0,0.0,0.0,1.0,0.0,0.0,100.0,8
1.0,1.0,0.0,0.0,100.0,0.0,0.0,7
2.0,0.0,0.0,2.0,0.0,0.0,100.0,5
1.0,0.0,1.0,0.0,0.0,100.0,0.0,4
2.0,2.0,0.0,0.0,100.0,0.0,0.0,2
5.0,5.0,0.0,0.0,100.0,0.0,0.0,2
4.0,2.0,0.0,2.0,50.0,0.0,50.0,2
2.0,0.0,1.0,1.0,0.0,50.0,50.0,1
2.0,0.0,2.0,0.0,0.0,100.0,0.0,1


### Merge all the social media data into one table

In [340]:
merged_social_media = musk_bitcoin_tweet_sentiment_features

### Output all the social media data

In [341]:
merged_social_media.to_csv(os.path.join(scrape_directory, "all_social_media_data.csv"))

# ----------------------------------------------------------------------------------------------------------------

# Read back in all the scraped data from the different sources and merge it into one df

In [342]:
list_of_files = ["all_internal_data.csv",
                 "all_stock_data.csv",
                 "all_economic_data.csv",
                 "all_social_media_data.csv",
                ]

In [343]:
merged_df = dc.read_in_all_data_and_merge_it(scrape_directory, list_of_files, start_date)

In [344]:
merged_df.head()

Unnamed: 0_level_0,trading_volume,trades_per_minute,volatility,bid_and_ask_spread,bid_and_ask_sum_asks,bid_and_ask_sum_bids,time_between_blocks,block_size_votes,rank_bit_x,rank_bitfinex,rank_bitstamp,rank_btce,rank_coinbase,rank_itbit,rank_kraken,rank_mtgox,rank_okcoin,rank_others,marketcap3sma,marketcap7sma,marketcap14sma,marketcap30sma,marketcap90sma,marketcap3ema,marketcap7ema,marketcap14ema,marketcap30ema,marketcap90ema,marketcap3wma,marketcap7wma,marketcap14wma,marketcap30wma,marketcap90wma,marketcap3trx,marketcap7trx,marketcap14trx,marketcap30trx,marketcap90trx,marketcap3mom,marketcap7mom,marketcap14mom,marketcap30mom,marketcap90mom,marketcap3std,marketcap7std,marketcap14std,marketcap30std,marketcap90std,marketcap3var,marketcap7var,...,turkey_services_business_3m_price_expectation,uk_services_business_3m_price_expectation,italy_services_business_3m_price_expectation,belgium_services_business_3m_price_expectation,denmark_services_business_3m_price_expectation,sweden_services_business_3m_price_expectation,euro_19_business_employment_expectation,germany_business_employment_expectation,france_business_employment_expectation,turkey_business_employment_expectation,uk_business_employment_expectation,italy_business_employment_expectation,belgium_business_employment_expectation,denmark_business_employment_expectation,sweden_business_employment_expectation,germany_percent_housholds_w_internet,france_percent_housholds_w_internet,turkey_percent_housholds_w_internet,uk_percent_housholds_w_internet,italy_percent_housholds_w_internet,belgium_percent_housholds_w_internet,denmark_percent_housholds_w_internet,sweden_percent_housholds_w_internet,denmark_gov_debt_all,sweden_gov_debt_all,euro_area_percent_afford_unexpected_expenses,euro_19_percent_afford_unexpected_expenses,germany_percent_afford_unexpected_expenses,france_percent_afford_unexpected_expenses,turkey_percent_afford_unexpected_expenses,uk_percent_afford_unexpected_expenses,italy_percent_afford_unexpected_expenses,belgium_percent_afford_unexpected_expenses,denmark_percent_afford_unexpected_expenses,sweden_percent_afford_unexpected_expenses,germany_trust_in_legal_system,france_trust_in_legal_system,turkey_trust_in_legal_system,uk_trust_in_legal_system,italy_trust_in_legal_system,belgium_trust_in_legal_system,denmark_trust_in_legal_system,sweden_trust_in_legal_system,musk_num_tweets,musk_num_pos_tweets,musk_num_neg_tweets,musk_num_neut_tweets,musk_percent_pos,musk_percent_neg,musk_percent_neut
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2009-01-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-02,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-03,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [345]:
merged_df.to_csv(os.path.join(data_directory, "all_scraped_data.csv"))