# Setup

### Install relevant packages

In [1]:
# conda install pandas
# pip install beautifulsoup4
# pip install requests
# pip install yfinance
# pip install yahoo_fin
# pip install requests_html
# pip install python-twitter
# conda install quandl
# pip install pandas_datareader
# pip install tweepy             # python client for the official Twitter API
# pip install textblob           # python library for processing textual data
# pip install twint              # python client for unlimited Twitter scrape
# pip install nest_asyncio

### Install relevant corpora

In [2]:
# python -m textblob.download_corpora   #NLTK corpora

### Import relevant packages

In [3]:
# general packages
import pandas as pd
from tqdm.auto import tqdm
import os

In [4]:
# packages for importing variables from YAML file
import yaml

In [5]:
# packages for printing nan value ranges
from pprint import pprint

In [6]:
# packages for getting quandl economic data
import quandl

### Ensure nested runtimes cannot happen

In [7]:
import nest_asyncio
nest_asyncio.apply()

### Configure max rows and max columns

In [8]:
pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 100)

### Load in the '.py' file containg the functions used in this notebook

In [9]:
%load_ext autoreload
%autoreload 1
%aimport data_collection_functions

import data_collection_functions as dc

# ----------------------------------------------------------------------------------------------------------------

# Read in variables from YAML configuration file

In [14]:
# read in YAML configuration file
with open("../../config_files/config.yaml", "r") as variables:
    config_variables = yaml.load(variables, Loader=yaml.FullLoader)

In [15]:
data_directory = config_variables["data_directory"]
scrape_directory = os.path.join(data_directory, "Scraped_data")

In [16]:
start_date = config_variables["start_date"]

# ----------------------------------------------------------------------------------------------------------------

# Scrape bitcoin's internal data

Internal Bitcoin features were an important basis point for the data I would use in this project for my prediction model.

These features directly described bitcoin with respect to the bitcoin market, looking at bitcoin as a traded asset, and also with respect to the bitcoin network, looking at bitcoin as an infrastructure.

Previous studies showed that these were important in dictating the prediction model with the most important feature at informing the forecast actually being the previous price. [REFERENCE]

While these studies also showed that these features were not enough to create a really good and accurate model, they did prove important to a degree. We will scrape these features and use them as a base of our model and then build on them with other features.

### Scrape the market and blockchain data from *'data.bitcoinity.org'*

The 'bitcoinity.org' website was a good place I found to scrape this internal data. It contained both market and blockchain data in an easy to scrape way.
This website was set up so that the specific data feature wanted can be clicked from a menu on the side of the screen and then a graph of this data shows up along with a button to download a CSV of this data.
This CSV formatted data from all the major exchanges in a manegable format so that I could scrape all of this into a usable table for my model.

While I could have manually downlaoded all of these CSV files, I decided to set up a scrape for this as this would allow me to scape the latest data at any point in the future rather than having to manually downlaod all the data each time. Instead, I could just run this scrape and obtain the latest data.

My process for this scrape was to iteratively click through the different data features and download their CSV files. After this, I could then read in these CSV files again and merge them into one table.
After this data was scraped and merged into a big table, I stored this table of internal features in a CSV to ease the process of reading it back in. This would help me save time when getting it again but also protected me from any obscure website changes.

#### Scrape the data

In [20]:
bitcoinity_data = dc.scrape_bitcoinity_data(scrape_directory, config_variables, merge_exchanges=True)

Scraping the bitcoin market data


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=11.0), HTML(value='')))

    - Trading Volume
    - Rank
    - Price
    - Market Cap
    - Trades Per Minute
    - Volatility
    - Bid/Ask Spread
    - Bid/Ask Sum

Scraping the bitcoin blockchain data


HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=7.0), HTML(value='')))

    - Hashrate
    - Mining Difficulty
    - Block Size
    - Number Of Transactions
    - Time Between Blocks
    - Block Size Votes

Scraped 4532 days of data - from '2009-01-09' to '2021-06-07'


#### Check where nan values are in this data

In [21]:
bitcoinity_nan_col_dates = dc.find_col_nan_ranges(bitcoinity_data, output=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=24.0), HTML(value='')))


---------------------------------------------------------------------
24 columns had a 'NaN' value in them:
['trading_volume',
 'price',
 'market_cap',
 'trades_per_minute',
 'volatility',
 'bid_and_ask_spread',
 'bid_and_ask_sum_asks',
 'bid_and_ask_sum_bids',
 'hashrate',
 'mining_difficulty',
 'block_size',
 'number_of_transactions',
 'time_between_blocks',
 'block_size_votes',
 'rank_bit_x',
 'rank_bitfinex',
 'rank_bitstamp',
 'rank_btce',
 'rank_coinbase',
 'rank_itbit',
 'rank_kraken',
 'rank_mtgox',
 'rank_okcoin',
 'rank_others']
---------------------------------------------------------------------
The date ranges in these columns where the NaN's are located are:
{'bid_and_ask_spread': [('2009-01-09', '2011-06-16'),
                        ('2011-12-01', '2011-12-04'),
                        ('2011-12-21', '2012-06-30'),
                        ('2012-07-12', '2012-07-14'),
                        ('2012-08-24', '2012-08-26'),
                        ('2012-09-06', '2012-10-

### Scrape the market and blockchain data from *'bitinfocharts.com'*

All the fees and transaction values are in USD.

- To obtain these in bitcoin -> divide by the BTC price in USD

You can also:
- Drop them
- Recalculate them after converting the raw values to the BTC

#### Scrape the data

In [22]:
bitinfocharts_data = dc.scrape_bitinfocharts_data(scrape_directory, config_variables)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=874.0), HTML(value='')))


Scraped 4539 days of data - from '2009-01-03' to '2021-06-08'


#### Check where nan values are in this data

In [23]:
bitinfocharts_nan_col_dates = dc.find_col_nan_ranges(bitinfocharts_data, output=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=874.0), HTML(value='')))


---------------------------------------------------------------------
874 columns had a 'NaN' value in them:
['marketcap',
 'marketcap3sma',
 'marketcap7sma',
 'marketcap14sma',
 'marketcap30sma',
 'marketcap90sma',
 'marketcap3ema',
 'marketcap7ema',
 'marketcap14ema',
 'marketcap30ema',
 'marketcap90ema',
 'marketcap3wma',
 'marketcap7wma',
 'marketcap14wma',
 'marketcap30wma',
 'marketcap90wma',
 'marketcap3trx',
 'marketcap7trx',
 'marketcap14trx',
 'marketcap30trx',
 'marketcap90trx',
 'marketcap3mom',
 'marketcap7mom',
 'marketcap14mom',
 'marketcap30mom',
 'marketcap90mom',
 'marketcap3std',
 'marketcap7std',
 'marketcap14std',
 'marketcap30std',
 'marketcap90std',
 'marketcap3var',
 'marketcap7var',
 'marketcap14var',
 'marketcap30var',
 'marketcap90var',
 'marketcap3rsi',
 'marketcap7rsi',
 'marketcap14rsi',
 'marketcap30rsi',
 'marketcap90rsi',
 'marketcap3roc',
 'marketcap7roc',
 'marketcap14roc',
 'marketcap30roc',
 'marketcap90roc',
 'price',
 'price3sma',
 'price7sma',
 

                          ('2012-02-19', '2012-02-20'),
                          ('2012-03-20', '2012-03-21'),
                          ('2012-11-25', '2012-11-26'),
                          ('2013-02-27', '2013-02-28'),
                          ('2013-12-08', '2013-12-09'),
                          ('2014-08-27', '2014-08-28'),
                          ('2014-09-03', '2014-09-04'),
                          ('2014-10-16', '2014-10-17'),
                          ('2014-11-16', '2014-11-17'),
                          ('2014-12-29', '2014-12-30'),
                          ('2015-02-28', '2015-03-01'),
                          ('2015-04-28', '2015-04-29'),
                          ('2015-08-04', '2015-08-05'),
                          ('2015-09-14', '2015-09-15'),
                          ('2016-03-07', '2016-03-08'),
                          ('2016-05-03', '2016-05-04'),
                          ('2016-07-09', '2016-07-10'),
                          ('2016-08-21', '2016-0

                    ('2020-11-28', '2020-11-29'),
                    ('2020-12-04', '2020-12-05'),
                    ('2020-12-07', '2020-12-10'),
                    ('2020-12-23', '2020-12-24'),
                    ('2021-01-01', '2021-01-09'),
                    ('2021-01-14', '2021-01-17'),
                    ('2021-01-18', '2021-01-20'),
                    ('2021-01-27', '2021-02-06'),
                    ('2021-02-12', '2021-02-13'),
                    ('2021-03-15', '2021-03-16'),
                    ('2021-03-23', '2021-04-02'),
                    ('2021-04-10', '2021-04-12'),
                    ('2021-04-14', '2021-04-15'),
                    ('2021-04-28', '2021-04-29'),
                    ('2021-05-11', '2021-05-12')],
 'difficulty3roc': [('2009-01-03', '2009-01-06'),
                    ('2009-01-07', '2009-12-30'),
                    ('2010-01-03', '2010-01-11'),
                    ('2010-01-15', '2010-01-25'),
                    ('2010-01-29', '2010-02-04'),

                    ('2009-01-15', '2009-12-30'),
                    ('2010-01-06', '2010-01-11'),
                    ('2010-01-18', '2010-01-25'),
                    ('2010-02-01', '2010-02-04'),
                    ('2010-02-11', '2010-02-14'),
                    ('2010-02-21', '2010-02-24'),
                    ('2010-03-03', '2010-03-08'),
                    ('2010-03-15', '2010-03-21'),
                    ('2010-03-28', '2010-04-01'),
                    ('2010-04-08', '2010-04-12'),
                    ('2010-04-19', '2010-04-21'),
                    ('2010-04-28', '2010-05-04'),
                    ('2010-05-11', '2010-05-19'),
                    ('2010-05-26', '2010-05-29'),
                    ('2010-06-05', '2010-06-11'),
                    ('2010-06-18', '2010-06-24'),
                    ('2010-07-01', '2010-07-06'),
                    ('2010-07-23', '2010-07-27'),
                    ('2010-08-03', '2010-08-05'),
                    ('2010-08-12', '2010-08-15'),


                       ('2014-05-25', '2014-05-27'),
                       ('2014-06-29', '2014-06-30'),
                       ('2014-07-30', '2014-08-02'),
                       ('2014-10-03', '2014-10-07'),
                       ('2014-10-27', '2014-10-28'),
                       ('2014-12-11', '2014-12-13'),
                       ('2015-03-03', '2015-03-05')],
 'fee_to_reward7wma': [('2009-01-03', '2010-07-23'),
                       ('2010-09-06', '2010-09-09'),
                       ('2010-09-16', '2010-09-18'),
                       ('2010-09-22', '2010-09-28'),
                       ('2010-10-07', '2010-10-13'),
                       ('2010-10-19', '2010-11-08'),
                       ('2010-11-14', '2010-11-15'),
                       ('2010-12-03', '2010-12-06')],
 'fee_to_reward90ema': [('2009-01-03', '2010-10-14')],
 'fee_to_reward90mom': [('2009-01-03', '2010-10-15'),
                        ('2010-10-18', '2010-10-23'),
                        ('2010-11-01', '

                   ('2009-04-27', '2009-04-28'),
                   ('2015-04-23', '2015-04-24'),
                   ('2016-10-21', '2016-10-22'),
                   ('2021-02-25', '2021-02-26'),
                   ('2021-03-07', '2021-03-08')],
 'hashrate14var': [('2009-01-03', '2009-01-16')],
 'hashrate14wma': [('2009-01-03', '2009-01-16')],
 'hashrate30ema': [('2009-01-03', '2009-02-01')],
 'hashrate30mom': [('2009-01-03', '2009-02-02')],
 'hashrate30roc': [('2009-01-03', '2009-02-08')],
 'hashrate30rsi': [('2009-01-03', '2009-02-02')],
 'hashrate30sma': [('2009-01-03', '2009-02-01')],
 'hashrate30std': [('2009-01-03', '2009-02-01')],
 'hashrate30trx': [('2009-01-03', '2009-04-01')],
 'hashrate30var': [('2009-01-03', '2009-02-01')],
 'hashrate30wma': [('2009-01-03', '2009-02-01')],
 'hashrate3ema': [('2009-01-03', '2009-01-09')],
 'hashrate3mom': [('2009-01-03', '2009-01-09')],
 'hashrate3roc': [('2009-01-03', '2009-01-12')],
 'hashrate3rsi': [('2009-01-03', '2009-01-09')],
 'hashra

                                 ('2021-06-06', '2021-06-07')],
 'mediantransactionvalue30var': [('2009-01-03', '2010-08-15')],
 'mediantransactionvalue30wma': [('2009-01-03', '2010-08-15')],
 'mediantransactionvalue3ema': [('2009-01-03', '2010-07-19')],
 'mediantransactionvalue3mom': [('2009-01-03', '2010-07-20'),
                                ('2010-11-24', '2010-11-25')],
 'mediantransactionvalue3roc': [('2009-01-03', '2010-07-20')],
 'mediantransactionvalue3rsi': [('2009-01-03', '2010-07-20')],
 'mediantransactionvalue3sma': [('2009-01-03', '2010-07-19')],
 'mediantransactionvalue3std': [('2009-01-03', '2010-07-19'),
                                ('2010-11-21', '2010-11-22')],
 'mediantransactionvalue3trx': [('2009-01-03', '2010-07-24')],
 'mediantransactionvalue3var': [('2009-01-03', '2010-07-19'),
                                ('2010-08-15', '2010-08-16'),
                                ('2010-08-22', '2010-08-26'),
                                ('2010-09-07', '2010-09-0

 'top100cap30sma': [('2009-01-03', '2009-02-01')],
 'top100cap30std': [('2009-01-03', '2009-02-01')],
 'top100cap30trx': [('2009-01-03', '2009-04-01'),
                    ('2011-01-19', '2011-01-20'),
                    ('2011-07-19', '2011-07-20'),
                    ('2011-09-19', '2011-09-20'),
                    ('2013-03-26', '2013-03-27'),
                    ('2013-06-10', '2013-06-11'),
                    ('2013-07-01', '2013-07-02'),
                    ('2014-02-03', '2014-02-04'),
                    ('2014-02-15', '2014-02-16'),
                    ('2014-07-16', '2014-07-17'),
                    ('2016-03-09', '2016-03-10'),
                    ('2016-04-15', '2016-04-16'),
                    ('2016-06-11', '2016-06-12'),
                    ('2016-11-11', '2016-11-12'),
                    ('2016-12-09', '2016-12-10'),
                    ('2017-06-12', '2017-06-13'),
                    ('2017-07-14', '2017-07-15'),
                    ('2018-07-31', '2018-08-01')

                          ('2011-04-07', '2011-04-09'),
                          ('2011-04-22', '2011-04-24'),
                          ('2011-04-26', '2011-04-27'),
                          ('2011-08-30', '2011-08-31'),
                          ('2011-10-13', '2011-10-14'),
                          ('2011-11-01', '2011-11-02'),
                          ('2011-11-08', '2011-11-09'),
                          ('2011-11-17', '2011-11-19'),
                          ('2011-11-20', '2011-11-24'),
                          ('2011-11-25', '2011-11-27'),
                          ('2011-11-29', '2011-12-01'),
                          ('2011-12-02', '2011-12-03'),
                          ('2011-12-04', '2011-12-08'),
                          ('2011-12-09', '2011-12-11'),
                          ('2011-12-16', '2011-12-17'),
                          ('2011-12-23', '2011-12-26'),
                          ('2012-03-06', '2012-03-07'),
                          ('2012-03-11', '2012-0

                       ('2009-10-10', '2009-10-11'),
                       ('2009-10-12', '2009-10-13'),
                       ('2009-10-14', '2009-10-20'),
                       ('2009-10-21', '2009-10-26'),
                       ('2009-10-27', '2009-10-30'),
                       ('2009-10-31', '2009-11-01'),
                       ('2009-11-02', '2009-11-03'),
                       ('2009-11-04', '2009-11-05'),
                       ('2009-11-06', '2009-11-07'),
                       ('2009-11-08', '2009-11-10'),
                       ('2009-11-11', '2009-11-12'),
                       ('2009-11-13', '2009-11-18'),
                       ('2009-11-19', '2009-11-25'),
                       ('2009-11-26', '2009-11-27'),
                       ('2009-11-28', '2009-12-03'),
                       ('2009-12-04', '2009-12-15'),
                       ('2009-12-16', '2009-12-20'),
                       ('2009-12-21', '2009-12-22'),
                       ('2009-12-24', '2009-12

 'transactionvalue14trx': [('2009-01-03', '2010-08-26'),
                           ('2017-01-25', '2017-01-26')],
 'transactionvalue14var': [('2009-01-03', '2010-07-30')],
 'transactionvalue14wma': [('2009-01-03', '2010-07-30')],
 'transactionvalue30ema': [('2009-01-03', '2010-08-15')],
 'transactionvalue30mom': [('2009-01-03', '2010-08-16')],
 'transactionvalue30roc': [('2009-01-03', '2010-08-16')],
 'transactionvalue30rsi': [('2009-01-03', '2010-08-16')],
 'transactionvalue30sma': [('2009-01-03', '2010-08-15')],
 'transactionvalue30std': [('2009-01-03', '2010-08-15')],
 'transactionvalue30trx': [('2009-01-03', '2010-10-13'),
                           ('2020-07-07', '2020-07-08')],
 'transactionvalue30var': [('2009-01-03', '2010-08-15')],
 'transactionvalue30wma': [('2009-01-03', '2010-08-15')],
 'transactionvalue3ema': [('2009-01-03', '2010-07-19')],
 'transactionvalue3mom': [('2009-01-03', '2010-07-20'),
                          ('2010-10-13', '2010-10-14')],
 'transactionvalue3r

### Merge the data from both of these different sources

In [24]:
# create a master dataframe with all the columns from both data sources
list_of_dfs = [bitcoinity_data, bitinfocharts_data]
merged_internal_data = dc.merge_dfs_on_col(list_of_dfs, "date")

In [25]:
# turn the same feature columns from both these data sources into one column
cols_to_join = config_variables["bitcoin_internal_data_cols_to_join"]
for col_name, (c1, c2) in cols_to_join.items():
    merged_internal_data[col_name] = merged_internal_data[[c1, c2]].mean(axis=1)
    merged_internal_data.drop(columns=[c1, c2], inplace=True)

In [26]:
merged_internal_data.shape

(4540, 890)

In [27]:
merged_internal_data.head()

Unnamed: 0,date,trading_volume,trades_per_minute,volatility,bid_and_ask_spread,bid_and_ask_sum_asks,bid_and_ask_sum_bids,time_between_blocks,block_size_votes,rank_bit_x,rank_bitfinex,rank_bitstamp,rank_btce,rank_coinbase,rank_itbit,rank_kraken,rank_mtgox,rank_okcoin,rank_others,marketcap3sma,marketcap7sma,marketcap14sma,marketcap30sma,marketcap90sma,marketcap3ema,marketcap7ema,marketcap14ema,marketcap30ema,marketcap90ema,marketcap3wma,marketcap7wma,marketcap14wma,marketcap30wma,marketcap90wma,marketcap3trx,marketcap7trx,marketcap14trx,marketcap30trx,marketcap90trx,marketcap3mom,marketcap7mom,marketcap14mom,marketcap30mom,marketcap90mom,marketcap3std,marketcap7std,marketcap14std,marketcap30std,marketcap90std,marketcap3var,...,google_trends90roc,top100cap,top100cap3sma,top100cap7sma,top100cap14sma,top100cap30sma,top100cap90sma,top100cap3ema,top100cap7ema,top100cap14ema,top100cap30ema,top100cap90ema,top100cap3wma,top100cap7wma,top100cap14wma,top100cap30wma,top100cap90wma,top100cap3trx,top100cap7trx,top100cap14trx,top100cap30trx,top100cap90trx,top100cap3mom,top100cap7mom,top100cap14mom,top100cap30mom,top100cap90mom,top100cap3std,top100cap7std,top100cap14std,top100cap30std,top100cap90std,top100cap3var,top100cap7var,top100cap14var,top100cap30var,top100cap90var,top100cap3rsi,top100cap7rsi,top100cap14rsi,top100cap30rsi,top100cap90rsi,top100cap3roc,top100cap7roc,top100cap14roc,top100cap30roc,top100cap90roc,price,hashrate,num_transactions
0,2009-01-03,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,2009-01-04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,2009-01-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,2009-01-06,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,2009-01-07,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [28]:
merged_internal_data.tail()

Unnamed: 0,date,trading_volume,trades_per_minute,volatility,bid_and_ask_spread,bid_and_ask_sum_asks,bid_and_ask_sum_bids,time_between_blocks,block_size_votes,rank_bit_x,rank_bitfinex,rank_bitstamp,rank_btce,rank_coinbase,rank_itbit,rank_kraken,rank_mtgox,rank_okcoin,rank_others,marketcap3sma,marketcap7sma,marketcap14sma,marketcap30sma,marketcap90sma,marketcap3ema,marketcap7ema,marketcap14ema,marketcap30ema,marketcap90ema,marketcap3wma,marketcap7wma,marketcap14wma,marketcap30wma,marketcap90wma,marketcap3trx,marketcap7trx,marketcap14trx,marketcap30trx,marketcap90trx,marketcap3mom,marketcap7mom,marketcap14mom,marketcap30mom,marketcap90mom,marketcap3std,marketcap7std,marketcap14std,marketcap30std,marketcap90std,marketcap3var,...,google_trends90roc,top100cap,top100cap3sma,top100cap7sma,top100cap14sma,top100cap30sma,top100cap90sma,top100cap3ema,top100cap7ema,top100cap14ema,top100cap30ema,top100cap90ema,top100cap3wma,top100cap7wma,top100cap14wma,top100cap30wma,top100cap90wma,top100cap3trx,top100cap7trx,top100cap14trx,top100cap30trx,top100cap90trx,top100cap3mom,top100cap7mom,top100cap14mom,top100cap30mom,top100cap90mom,top100cap3std,top100cap7std,top100cap14std,top100cap30std,top100cap90std,top100cap3var,top100cap7var,top100cap14var,top100cap30var,top100cap90var,top100cap3rsi,top100cap7rsi,top100cap14rsi,top100cap30rsi,top100cap90rsi,top100cap3roc,top100cap7roc,top100cap14roc,top100cap30roc,top100cap90roc,price,hashrate,num_transactions
4535,2021-06-04,10316.58531,126.599583,130.66954,3.163537,28035050.0,40907700.0,,,0.013837,2367.101322,643.822424,,2253.85999,,918.594176,,,286.762147,705036500000.0,685966900000.0,693796900000.0,824637700000.0,980057700000.0,700276500000.0,697846200000.0,719055100000.0,794676700000.0,882203400000.0,704493200000.0,694570500000.0,693694700000.0,745818600000.0,919870300000.0,0.584,-0.887,-1.383,-0.505,0.479,9806085000.0,9463307000.0,-37374290000.0,-349752000000.0,-210977700000.0,23692410000.0,39490510000.0,47155730000.0,311293200000.0,299154200000.0,1.403326e+20,...,27.945,15.394,15.381,15.36,15.338,15.242,14.695,15.38,15.362,15.33,15.209,14.763,15.382,15.37,15.35,15.312,14.933,0.061,0.049,0.106,0.121,0.048,0.013,0.09,0.063,0.5,1.261,0.03,0.055,0.07,0.302,0.868,,0.001,0.001,0.023,0.188,70.325,66.323,69.08,72.488,67.449,0.084,0.589,0.412,3.36,8.92,37103.326436,1.427373e+20,236541.0
4536,2021-06-05,8809.493729,105.347778,139.557779,3.173245,26884280.0,47525290.0,,,0.013392,2296.656818,636.978282,,2255.603132,,919.273682,,,278.161825,700776600000.0,689588400000.0,692612100000.0,811973100000.0,977275100000.0,692919800000.0,694775400000.0,714589500000.0,787637100000.0,877881600000.0,694756600000.0,694469500000.0,692596900000.0,736846100000.0,913397900000.0,0.119,-0.741,-1.346,-0.528,0.468,-12779450000.0,25350450000.0,-16587050000.0,-379938000000.0,-250432600000.0,30571510000.0,33587410000.0,47090150000.0,301836700000.0,305336800000.0,2.336543e+20,...,,15.394,15.383,15.367,15.342,15.258,14.709,15.387,15.37,15.339,15.221,14.777,15.389,15.379,15.357,15.322,14.949,0.058,0.05,0.102,0.12,0.049,0.005,0.053,0.048,0.488,1.284,0.032,0.057,0.076,0.28,0.871,,0.001,0.001,0.02,0.19,70.644,66.44,69.126,72.506,67.456,0.031,0.346,0.315,3.276,9.103,36588.112917,1.416449e+20,194449.0
4537,2021-06-06,5645.201043,72.603889,85.333909,3.219852,27790920.0,47568150.0,,,0.012962,2423.252163,608.964865,,2186.218494,,839.376925,,,255.808708,684663300000.0,690887200000.0,693799800000.0,798996300000.0,974235200000.0,683131700000.0,689417500000.0,709090000000.0,780263400000.0,873386300000.0,681040000000.0,690408300000.0,690027800000.0,727902200000.0,906718000000.0,-0.378,-0.659,-1.309,-0.551,0.457,-48339910000.0,9091399000.0,16627880000.0,-389303900000.0,-273591400000.0,17795950000.0,30089030000.0,44156090000.0,290888100000.0,311862500000.0,7.917395e+19,...,,15.41,15.399,15.379,15.346,15.274,14.724,15.398,15.38,15.348,15.234,14.791,15.402,15.389,15.366,15.331,14.964,0.061,0.051,0.097,0.12,0.049,0.049,0.08,0.058,0.484,1.329,0.014,0.055,0.083,0.256,0.873,,0.001,0.002,0.016,0.191,78.902,69.451,70.279,72.955,67.635,0.318,0.519,0.378,3.244,9.439,35963.987147,1.492974e+20,185155.0
4538,2021-06-07,11122.304641,116.845139,118.959724,3.214679,30025520.0,47405020.0,,,0.013481,2389.861573,599.284675,,2085.406491,,812.920649,,,271.071458,677019000000.0,690206200000.0,692327700000.0,784911800000.0,970520400000.0,677641100000.0,685100700000.0,704164700000.0,773288300000.0,868963500000.0,674783600000.0,685724100000.0,687141200000.0,719718600000.0,900078800000.0,-0.608,-0.61,-1.272,-0.571,0.445,-22932870000.0,-4766648000.0,-20609450000.0,-422534800000.0,-334331600000.0,12122330000.0,31505270000.0,45548830000.0,272598800000.0,318139200000.0,3.673772e+19,...,,15.438,15.414,15.395,15.358,15.29,14.739,15.418,15.395,15.36,15.247,14.805,15.421,15.404,15.379,15.342,14.98,0.079,0.054,0.094,0.119,0.05,0.044,0.117,0.165,0.493,1.34,0.036,0.044,0.085,0.232,0.876,,,0.002,0.013,0.192,88.254,74.491,72.377,73.79,67.971,0.286,0.767,1.082,3.297,9.509,39386.718512,1.46743e+20,235346.0
4539,2021-06-08,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,15.432,15.426,15.403,15.368,15.307,14.753,15.425,15.404,15.37,15.259,14.819,15.43,15.413,15.389,15.351,14.995,0.074,0.056,0.091,0.118,0.051,0.037,0.051,0.147,0.483,1.325,0.024,0.049,0.083,0.199,0.877,,0.001,0.002,0.01,0.192,77.102,71.497,71.2,73.281,67.816,0.242,0.331,0.959,3.23,9.392,33194.0,,


### Fill in the Nan values using interpolation

In [29]:
processed_internal_data = merged_internal_data.interpolate(axis=0)

#### Check again if there are any NaN values

In [30]:
processed_internal_nan_col_dates = dc.find_col_nan_ranges(processed_internal_data, output=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=889.0), HTML(value='')))


---------------------------------------------------------------------
889 columns had a 'NaN' value in them:
['trading_volume',
 'trades_per_minute',
 'volatility',
 'bid_and_ask_spread',
 'bid_and_ask_sum_asks',
 'bid_and_ask_sum_bids',
 'time_between_blocks',
 'block_size_votes',
 'rank_bit_x',
 'rank_bitfinex',
 'rank_bitstamp',
 'rank_btce',
 'rank_coinbase',
 'rank_itbit',
 'rank_kraken',
 'rank_mtgox',
 'rank_okcoin',
 'rank_others',
 'marketcap3sma',
 'marketcap7sma',
 'marketcap14sma',
 'marketcap30sma',
 'marketcap90sma',
 'marketcap3ema',
 'marketcap7ema',
 'marketcap14ema',
 'marketcap30ema',
 'marketcap90ema',
 'marketcap3wma',
 'marketcap7wma',
 'marketcap14wma',
 'marketcap30wma',
 'marketcap90wma',
 'marketcap3trx',
 'marketcap7trx',
 'marketcap14trx',
 'marketcap30trx',
 'marketcap90trx',
 'marketcap3mom',
 'marketcap7mom',
 'marketcap14mom',
 'marketcap30mom',
 'marketcap90mom',
 'marketcap3std',
 'marketcap7std',
 'marketcap14std',
 'marketcap30std',
 'marketcap90std

 'price30std': [('2009-01-03', '2010-08-15')],
 'price30trx': [('2009-01-03', '2010-10-13')],
 'price30var': [('2009-01-03', '2010-10-27')],
 'price30wma': [('2009-01-03', '2010-08-15')],
 'price3ema': [('2009-01-03', '2010-07-19')],
 'price3mom': [('2009-01-03', '2010-07-20')],
 'price3roc': [('2009-01-03', '2010-07-20')],
 'price3rsi': [('2009-01-03', '2010-07-20')],
 'price3sma': [('2009-01-03', '2010-07-19')],
 'price3std': [('2009-01-03', '2010-07-19')],
 'price3trx': [('2009-01-03', '2010-07-24')],
 'price3var': [('2009-01-03', '2010-10-27')],
 'price3wma': [('2009-01-03', '2010-07-19')],
 'price7ema': [('2009-01-03', '2010-07-23')],
 'price7mom': [('2009-01-03', '2010-07-24')],
 'price7roc': [('2009-01-03', '2010-07-24')],
 'price7rsi': [('2009-01-03', '2010-07-24')],
 'price7sma': [('2009-01-03', '2010-07-23')],
 'price7std': [('2009-01-03', '2010-07-23')],
 'price7trx': [('2009-01-03', '2010-08-05')],
 'price7var': [('2009-01-03', '2010-10-27')],
 'price7wma': [('2009-01-03', 

### Output internal bitcoin data file

In [31]:
processed_internal_data.to_csv(os.path.join(scrape_directory, "all_internal_data.csv"), index=False)

Could also scrape:

https://www.blockchain.com/charts/total-bitcoins

# ----------------------------------------------------------------------------------------------------------------

As well as the internal data being useful to bitcoin price prediction, it is equally important to include external data. As I saw from my literature review, these features are very good indicators of bitcoin price and this has been demonstrated in multiple studies. [REFERENCE]

I will deal with multiple different types of external data, including, stock data, commodity data, twitter data and other news data. We will explore the data collection and processing for this below.

# Get stock data

Given that Bitcoin is a financial asset, stock data will also play a part in its price.
In https://www.researchgate.net/publication/330224580_Analysis_of_the_relationships_between_Bitcoin_and_exchange_rate_commodities_and_global_indexes_by_asymmetric_causality_test, they look at the relationship between stock global indexes and Bitcoin. 

https://www.ig.com/en/trading-opportunities/top-10-most-traded-commodities-180905
The data we’re importing are price data for copper and gold futures. The ratio of copper-to-gold prices is often seen an an indicator of economic health with copper going up relative to gold when there is strong economic growth and hence industrial demand. If gold outperforms copper that’s often a sign that investors are worried about growth and inflation.

### Get the stock, commodity, and currency exchange data from Yahoo Finance

In [32]:
yahoo_name_to_ticker_map = config_variables["yahoo_name_to_ticker_map"]

In [33]:
yahoo_stock_df = dc.scrape_stock_from_yahoo_finance(scrape_directory, yahoo_name_to_ticker_map, start_date)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=60.0), HTML(value='')))




### Check this dataframe for nan values

In [34]:
yahoo_nan_col_dates = dc.find_col_nan_ranges(yahoo_stock_df, output=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=5.0), HTML(value='')))


---------------------------------------------------------------------
5 columns had a 'NaN' value in them:
['aluminium_futures_open',
 'aluminium_futures_high',
 'aluminium_futures_low',
 'aluminium_futures_close',
 'aluminium_futures_volume']
---------------------------------------------------------------------
The date ranges in these columns where the NaN's are located are:
{'aluminium_futures_close': [('2009-01-01', '2014-05-06')],
 'aluminium_futures_high': [('2009-01-01', '2014-05-06')],
 'aluminium_futures_low': [('2009-01-01', '2014-05-06')],
 'aluminium_futures_open': [('2009-01-01', '2014-05-06')],
 'aluminium_futures_volume': [('2009-01-01', '2014-05-06')]}


### Manually inspect the dataframe

In [35]:
yahoo_stock_df.shape

(4542, 287)

In [36]:
yahoo_stock_df.head()

Unnamed: 0_level_0,sp_500_open,sp_500_high,sp_500_low,sp_500_close,sp_500_volume,dow_jones_open,dow_jones_high,dow_jones_low,dow_jones_close,dow_jones_volume,vix_open,vix_high,vix_low,vix_close,ftse_100_open,ftse_100_high,ftse_100_low,ftse_100_close,ftse_100_volume,euro_stoxx_50_open,euro_stoxx_50_high,euro_stoxx_50_low,euro_stoxx_50_close,euro_stoxx_50_volume,nasdaq_exchange_open,nasdaq_exchange_high,nasdaq_exchange_low,nasdaq_exchange_close,nasdaq_exchange_volume,new_york_exchange_open,new_york_exchange_high,new_york_exchange_low,new_york_exchange_close,new_york_exchange_volume,amex_exchange_open,amex_exchange_high,amex_exchange_low,amex_exchange_close,amex_exchange_volume,russell_2000_open,russell_2000_high,russell_2000_low,russell_2000_close,russell_2000_volume,euronext_100_euro_open,euronext_100_euro_high,euronext_100_euro_low,euronext_100_euro_close,euronext_100_euro_volume,irish_exchange_open,...,live_cattle_futures_volume,lean_hogs_futures_open,lean_hogs_futures_high,lean_hogs_futures_low,lean_hogs_futures_close,lean_hogs_futures_volume,eur_gbp_exchange_open,eur_gbp_exchange_high,eur_gbp_exchange_low,eur_gbp_exchange_close,eur_chf_exchange_open,eur_chf_exchange_high,eur_chf_exchange_low,eur_chf_exchange_close,eur_jpy_exchange_open,eur_jpy_exchange_high,eur_jpy_exchange_low,eur_jpy_exchange_close,gbp_jyp_exchange_open,gbp_jyp_exchange_high,gbp_jyp_exchange_low,gbp_jyp_exchange_close,usd_gbp_exchange_open,usd_gbp_exchange_high,usd_gbp_exchange_low,usd_gbp_exchange_close,usd_eur_exchange_open,usd_eur_exchange_high,usd_eur_exchange_low,usd_eur_exchange_close,usd_cad_exchange_open,usd_cad_exchange_high,usd_cad_exchange_low,usd_cad_exchange_close,usd_aud_exchange_open,usd_aud_exchange_high,usd_aud_exchange_low,usd_aud_exchange_close,usd_mxn_exchange_open,usd_mxn_exchange_high,usd_mxn_exchange_low,usd_mxn_exchange_close,usd_hkd_exchange_open,usd_hkd_exchange_high,usd_hkd_exchange_low,usd_hkd_exchange_close,usd_jpy_exchange_open,usd_jpy_exchange_high,usd_jpy_exchange_low,usd_jpy_exchange_close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2009-01-01,903.25,903.25,903.25,903.25,0,8776.389648,8776.389648,8776.389648,8776.389648,0,40.0,40.0,40.0,40.0,4392.700195,4392.700195,4392.700195,4392.700195,0,2451.47998,2451.47998,2451.47998,2451.47998,0,1577.030029,1577.030029,1577.030029,1577.030029,0,5757.049805,5757.049805,5757.049805,5757.049805,0,1397.530029,1397.530029,1397.530029,1397.530029,0,499.450012,499.450012,499.450012,499.450012,0,544.919983,544.919983,544.919983,544.919983,0,2350.610107,...,0,59.724998,59.724998,59.724998,59.724998,0,0.9586,0.9586,0.9472,0.9508,1.473,1.4942,1.473,1.4906,126.870003,128.179993,125.57,126.919998,131.380005,134.563004,131.320007,133.501999,0.6898,0.6898,0.67556,0.67838,0.71531,0.71659,0.71154,0.71434,1.2272,1.2272,1.2177,1.2218,1.4174,1.4453,1.401,1.4184,13.756,13.756,13.3716,13.434,7.7497,7.8081,7.7275,7.75,90.650002,91.25,87.559998,90.667
2009-01-02,902.98999,934.72998,899.349976,931.799988,4048270000,8772.25,9065.280273,8760.780273,9034.69043,213700000,39.580002,39.82,36.880001,39.189999,4434.200195,4561.799805,4430.0,4561.799805,407295500,2451.47998,2451.47998,2451.47998,2451.47998,0,1578.869995,1636.030029,1571.97998,1632.209961,1438410000,5755.759766,5937.02002,5732.680176,5915.72998,4048270000,1397.530029,1444.290039,1396.819946,1444.290039,139700,499.51001,508.450012,494.73999,505.839996,4048270000,546.909973,566.380005,546.530029,566.380005,153143300,2343.919922,...,16544,60.875,63.875,60.875,63.849998,15191,0.9507,0.9644,0.9438,0.9583,1.4908,1.5049,1.4814,1.5022,126.976997,128.0,126.032997,127.860001,133.526993,133.970001,131.559998,133.380005,0.67926,0.69382,0.67728,0.68852,0.71415,0.72239,0.71311,0.71824,1.2224,1.2274,1.2062,1.2094,1.4184,1.444,1.4045,1.4098,13.449,13.857,13.4244,13.775,7.749,7.7547,7.7445,7.7502,90.660004,92.370003,90.660004,91.790001
2009-01-03,931.799988,931.799988,931.799988,931.799988,0,9034.69043,9034.69043,9034.69043,9034.69043,0,39.189999,39.189999,39.189999,39.189999,4561.799805,4561.799805,4561.799805,4561.799805,0,2451.47998,2451.47998,2451.47998,2451.47998,0,1632.209961,1632.209961,1632.209961,1632.209961,0,5915.72998,5915.72998,5915.72998,5915.72998,0,1444.290039,1444.290039,1444.290039,1444.290039,0,505.839996,505.839996,505.839996,505.839996,0,566.380005,566.380005,566.380005,566.380005,0,2419.5,...,0,63.849998,63.849998,63.849998,63.849998,0,0.9583,0.9583,0.9583,0.9583,1.5022,1.5022,1.5022,1.5022,127.860001,127.860001,127.860001,127.860001,133.380005,133.380005,133.380005,133.380005,0.68852,0.68852,0.68852,0.68852,0.71824,0.71824,0.71824,0.71824,1.2094,1.2094,1.2094,1.2094,1.4098,1.4098,1.4098,1.4098,13.775,13.775,13.775,13.775,7.7502,7.7502,7.7502,7.7502,91.790001,91.790001,91.790001,91.790001
2009-01-04,931.799988,931.799988,931.799988,931.799988,0,9034.69043,9034.69043,9034.69043,9034.69043,0,39.189999,39.189999,39.189999,39.189999,4561.799805,4561.799805,4561.799805,4561.799805,0,2451.47998,2451.47998,2451.47998,2451.47998,0,1632.209961,1632.209961,1632.209961,1632.209961,0,5915.72998,5915.72998,5915.72998,5915.72998,0,1444.290039,1444.290039,1444.290039,1444.290039,0,505.839996,505.839996,505.839996,505.839996,0,566.380005,566.380005,566.380005,566.380005,0,2419.5,...,0,63.849998,63.849998,63.849998,63.849998,0,0.9583,0.9583,0.9583,0.9583,1.5022,1.5022,1.5022,1.5022,127.860001,127.860001,127.860001,127.860001,133.380005,133.380005,133.380005,133.380005,0.68852,0.68852,0.68852,0.68852,0.71824,0.71824,0.71824,0.71824,1.2094,1.2094,1.2094,1.2094,1.4098,1.4098,1.4098,1.4098,13.775,13.775,13.775,13.775,7.7502,7.7502,7.7502,7.7502,91.790001,91.790001,91.790001,91.790001
2009-01-05,929.169983,936.630005,919.530029,927.450012,5413910000,9027.129883,9034.370117,8892.360352,8952.889648,233760000,39.240002,40.220001,38.299999,39.080002,4561.799805,4618.100098,4520.799805,4579.600098,836676100,2551.699951,2563.959961,2527.330078,2553.409912,0,1621.47998,1640.459961,1604.630005,1628.030029,1816580000,5915.720215,5963.700195,5839.919922,5908.430176,5413910000,1444.290039,1461.219971,1425.619995,1461.219971,169000,505.559998,508.230011,496.01001,505.029999,5413910000,571.380005,572.919983,563.26001,571.210022,224922900,2419.5,...,11996,63.950001,63.950001,62.400002,62.674999,15383,0.9604,0.9633,0.923,0.9266,1.4951,1.5139,1.4929,1.5119,128.460007,128.460007,126.417999,127.110001,133.639999,137.529999,132.919998,137.139999,0.68874,0.69252,0.67849,0.67953,0.71644,0.73768,0.71644,0.73282,1.2081,1.2213,1.1866,1.188,1.3974,1.4151,1.39,1.4,13.7726,13.7742,13.42,13.4275,7.7499,7.7557,7.743,7.7509,92.050003,93.529999,91.75,93.217003


In [37]:
yahoo_stock_df.tail()

Unnamed: 0_level_0,sp_500_open,sp_500_high,sp_500_low,sp_500_close,sp_500_volume,dow_jones_open,dow_jones_high,dow_jones_low,dow_jones_close,dow_jones_volume,vix_open,vix_high,vix_low,vix_close,ftse_100_open,ftse_100_high,ftse_100_low,ftse_100_close,ftse_100_volume,euro_stoxx_50_open,euro_stoxx_50_high,euro_stoxx_50_low,euro_stoxx_50_close,euro_stoxx_50_volume,nasdaq_exchange_open,nasdaq_exchange_high,nasdaq_exchange_low,nasdaq_exchange_close,nasdaq_exchange_volume,new_york_exchange_open,new_york_exchange_high,new_york_exchange_low,new_york_exchange_close,new_york_exchange_volume,amex_exchange_open,amex_exchange_high,amex_exchange_low,amex_exchange_close,amex_exchange_volume,russell_2000_open,russell_2000_high,russell_2000_low,russell_2000_close,russell_2000_volume,euronext_100_euro_open,euronext_100_euro_high,euronext_100_euro_low,euronext_100_euro_close,euronext_100_euro_volume,irish_exchange_open,...,live_cattle_futures_volume,lean_hogs_futures_open,lean_hogs_futures_high,lean_hogs_futures_low,lean_hogs_futures_close,lean_hogs_futures_volume,eur_gbp_exchange_open,eur_gbp_exchange_high,eur_gbp_exchange_low,eur_gbp_exchange_close,eur_chf_exchange_open,eur_chf_exchange_high,eur_chf_exchange_low,eur_chf_exchange_close,eur_jpy_exchange_open,eur_jpy_exchange_high,eur_jpy_exchange_low,eur_jpy_exchange_close,gbp_jyp_exchange_open,gbp_jyp_exchange_high,gbp_jyp_exchange_low,gbp_jyp_exchange_close,usd_gbp_exchange_open,usd_gbp_exchange_high,usd_gbp_exchange_low,usd_gbp_exchange_close,usd_eur_exchange_open,usd_eur_exchange_high,usd_eur_exchange_low,usd_eur_exchange_close,usd_cad_exchange_open,usd_cad_exchange_high,usd_cad_exchange_low,usd_cad_exchange_close,usd_aud_exchange_open,usd_aud_exchange_high,usd_aud_exchange_low,usd_aud_exchange_close,usd_mxn_exchange_open,usd_mxn_exchange_high,usd_mxn_exchange_low,usd_mxn_exchange_close,usd_hkd_exchange_open,usd_hkd_exchange_high,usd_hkd_exchange_low,usd_hkd_exchange_close,usd_jpy_exchange_open,usd_jpy_exchange_high,usd_jpy_exchange_low,usd_jpy_exchange_close
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2021-06-04,4206.049805,4233.450195,4206.049805,4229.890137,3487070000,34618.691406,34772.121094,34618.691406,34756.390625,270050000,18.09,18.42,16.18,16.42,7064.399902,7071.799805,7039.700195,7069.0,425216000,4079.290039,4091.949951,4070.350098,4089.379883,23525900,13697.25,13826.820312,13692.009766,13814.490234,4341800000,16632.339844,16718.009766,16632.339844,16708.740234,3487070000,3267.129883,3292.360107,3264.790039,3285.75,0,2281.070068,2293.679932,2277.439941,2286.409912,3487070000,1260.160034,1263.26001,1257.319946,1262.790039,149203500,8284.820312,...,5136,119.525002,120.150002,118.849998,119.5,7167,0.85972,0.86001,0.8566,0.8599,1.0959,1.0966,1.0937,1.0959,133.759995,133.764999,133.139999,133.753006,155.559998,155.718994,155.020004,155.550003,0.7087,0.709925,0.7042,0.70872,0.824382,0.82607,0.8205,0.824198,1.21095,1.21321,1.20734,1.2107,1.304631,1.30678,1.290989,1.3045,20.153811,20.20525,19.9077,20.1404,7.7577,7.7587,7.75626,7.758,110.286003,110.322998,109.371002,110.265999
2021-06-05,4229.890137,4229.890137,4229.890137,4229.890137,0,34756.390625,34756.390625,34756.390625,34756.390625,0,16.42,16.42,16.42,16.42,7069.0,7069.0,7069.0,7069.0,0,4089.379883,4089.379883,4089.379883,4089.379883,0,13814.490234,13814.490234,13814.490234,13814.490234,0,16708.740234,16708.740234,16708.740234,16708.740234,0,3285.75,3285.75,3285.75,3285.75,0,2286.409912,2286.409912,2286.409912,2286.409912,0,1262.790039,1262.790039,1262.790039,1262.790039,0,8273.700195,...,0,119.5,119.5,119.5,119.5,0,0.8599,0.8599,0.8599,0.8599,1.0959,1.0959,1.0959,1.0959,133.753006,133.753006,133.753006,133.753006,155.550003,155.550003,155.550003,155.550003,0.70872,0.70872,0.70872,0.70872,0.824198,0.824198,0.824198,0.824198,1.2107,1.2107,1.2107,1.2107,1.3045,1.3045,1.3045,1.3045,20.1404,20.1404,20.1404,20.1404,7.758,7.758,7.758,7.758,110.265999,110.265999,110.265999,110.265999
2021-06-06,4229.890137,4229.890137,4229.890137,4229.890137,0,34756.390625,34756.390625,34756.390625,34756.390625,0,16.42,16.42,16.42,16.42,7069.0,7069.0,7069.0,7069.0,0,4089.379883,4089.379883,4089.379883,4089.379883,0,13814.490234,13814.490234,13814.490234,13814.490234,0,16708.740234,16708.740234,16708.740234,16708.740234,0,3285.75,3285.75,3285.75,3285.75,0,2286.409912,2286.409912,2286.409912,2286.409912,0,1262.790039,1262.790039,1262.790039,1262.790039,0,8273.700195,...,0,119.5,119.5,119.5,119.5,0,0.8599,0.8599,0.8599,0.8599,1.0959,1.0959,1.0959,1.0959,133.753006,133.753006,133.753006,133.753006,155.550003,155.550003,155.550003,155.550003,0.70872,0.70872,0.70872,0.70872,0.824198,0.824198,0.824198,0.824198,1.2107,1.2107,1.2107,1.2107,1.3045,1.3045,1.3045,1.3045,20.1404,20.1404,20.1404,20.1404,7.758,7.758,7.758,7.758,110.265999,110.265999,110.265999,110.265999
2021-06-07,4229.339844,4232.339844,4215.660156,4226.52002,3835570000,34766.199219,34820.910156,34574.511719,34630.238281,259120000,17.34,17.35,15.78,16.42,7069.0,7098.299805,7042.200195,7077.200195,963131300,4089.27002,4107.029785,4069.52002,4097.649902,23910700,13802.820312,13889.110352,13784.889648,13881.719727,4602940000,16708.740234,16725.550781,16669.900391,16709.300781,3835570000,3285.75,3313.620117,3280.149902,3285.75,0,2286.929932,2321.639893,2286.929932,2319.179932,3835570000,1260.699951,1269.089966,1256.569946,1266.800049,139397900,8276.620117,...,5136,119.800003,119.949997,119.300003,119.900002,7167,0.85897,0.86124,0.8578,0.859,1.09407,1.09479,1.0929,1.0939,133.339996,133.339996,132.869995,133.360001,155.244003,155.244003,154.539993,155.229996,0.70612,0.7086,0.70502,0.706,0.82198,0.82338,0.819538,0.8218,1.2075,1.21065,1.20618,1.20758,1.292,1.294147,1.2873,1.291322,19.958401,19.962151,19.725201,19.943899,7.757,7.75939,7.7559,7.75713,109.632004,109.632004,109.184998,109.616997
2021-06-08,4226.52002,4226.52002,4226.52002,4226.52002,0,34630.238281,34630.238281,34630.238281,34630.238281,0,16.42,16.42,16.42,16.42,7077.220215,7102.75,7074.939941,7102.680176,0,4098.629883,4113.830078,4094.330078,4113.310059,0,13881.719727,13881.719727,13881.719727,13881.719727,0,16709.300781,16709.300781,16709.300781,16709.300781,0,3285.75,3285.75,3285.75,3285.75,0,2319.179932,2319.179932,2319.179932,2319.179932,0,1267.540039,1272.060059,1267.079956,1272.050049,0,8292.360352,...,0,119.900002,119.900002,119.900002,119.900002,0,0.8596,0.86133,0.85928,0.86085,1.09405,1.09443,1.09104,1.09174,133.160995,133.352997,133.110001,133.337006,154.899994,155.084,154.710007,154.895004,0.70526,0.70772,0.70498,0.70714,0.8197,0.8218,0.8196,0.8212,1.208,1.2095,1.2072,1.2087,1.28929,1.29238,1.2875,1.29182,19.780001,19.8571,19.776199,19.806999,7.759,7.75953,7.7578,7.7591,109.240997,109.549004,109.160004,109.538002


### Output a stock data file

In [38]:
yahoo_stock_df.to_csv(os.path.join(scrape_directory, "all_stock_data.csv"))

# ----------------------------------------------------------------------------------------------------------------

In [14]:
merged_internal_data = pd.read_csv(os.path.join(scrape_directory, "all_internal_data.csv"))
yahoo_stock_df = pd.read_csv(os.path.join(scrape_directory, "all_stock_data.csv"))

# Get economic data

### Load US data from FRED API into a dataframe

The Federal Reserve Bank of St. Louis’s FRED API, accessed via the quandl module, is very convenient. Quandl requires an API token to increase the calling limit and is easily implemented.

#### Scrape the data

In [39]:
quandl.ApiConfig.api_key = os.environ.get('QUANDL_API_KEY')

In [40]:
fred_indicator_tickers = config_variables["fred_tickers"]

# this is code to iterate through all of the time series on the FRED and scrape their indicators
#fred_indicator_tickers = dc.scrape_fed_economic_data_codes(config_variables)

In [41]:
fred_df = dc.scrape_fred_indicators(scrape_directory, fred_indicator_tickers, start_date)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=59.0), HTML(value='')))




#### Check where nan values are in this data

In [42]:
fred_nan_col_dates = dc.find_col_nan_ranges(fred_df, output=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=1.0), HTML(value='')))


---------------------------------------------------------------------
1 columns had a 'NaN' value in them:
['us_30_year_be_inflation_rate']
---------------------------------------------------------------------
The date ranges in these columns where the NaN's are located are:
{'us_30_year_be_inflation_rate': [('2009-01-01', '2010-02-01')]}


#### Manually inspect the data

In [43]:
fred_df.shape

(4542, 59)

In [44]:
fred_df.head()

Unnamed: 0_level_0,us_gdp,us_real_GDP,us_real_pot_gdp,us_cpi_all,us_cpi_no_food_and_energy,us_cpi_no_food_and_energy_perc_change,us_median_cpi,us_gdp_ipd,us_monetart_base,us_m1_money,us_m2_money,us_velocity_m1_money,us_velocity_m2_money,us_fed_funds_rate,us_3_month_secondary_tbill,us_5_year_treasury_const_mat_rate,us_10_year_treasury_const_mat_rate,us_30_year_treasury_const_mat_rate,us_5_year_be_inflation_rate,us_10_year_be_inflation_rate,us_20_year_be_inflation_rate,us_30_year_be_inflation_rate,us_5_yr_5_yr_fwd_inflatn_expectation,us_ted_spread,us_prime_loan_rate,us_unemployment_rate,us_long_natural_unemployment_rate,us_short_natural_unemployment_rate,us_labour_force_employ_rate,us_pop_employ_rate,us_num_unemployed,us_nonfarm_num_employed,us_num_employed_in_manufactoring,us_num_filing_for_unemployment,us_median_house_income,us_total_real_disposable_income,us_tot_personal_consumption_spend,us_tot_personal_consumption_spend_dg,us_percent_personal_saving_rate,us_real_retail_and_food_sales,us_total_disposble_income,us_industry_production_index,us_capacity_utilisation,us_new_housing_devs_started,us_gross_private_domestic_investment,us_corporate_profit_aftr_tax,us_financial_stress_index,west_texas_crude_oil_price,us_leading_index,us_currency_trade_weighted_dollar_index,us_broad_trade_weighted_dollar_index,us_total_public_debt,us_public_debt_as_perc_of_gdp,us_bank_excess_capital_reserves,us_total_commercial_loans,us_10_year_yield,us_5_year_yield,us_3_year_yield,us_2_year_yield
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1
2009-01-01,14394.547,15155.94,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.14,0.11,1.55,2.25,2.69,-0.26,0.11,1.0,,0.48,1.32,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,533000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,88.583,69.819,490.0,2014.878,1082.708,3.308,44.6,-2.32,79.5258,107.235,11126941.0,77.2997,798335.0,1537.8605,2.776,1.6039,0.958,0.6146
2009-01-02,14394.547,15155.94,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.08,0.08,1.72,2.46,2.83,-0.25,0.17,1.0,,0.59,1.33,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,533000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,88.583,69.819,490.0,2014.878,1082.708,3.246,46.17,-2.32,79.257,107.2518,11126941.0,77.2997,798335.0,1537.8605,2.9618,1.768,1.0916,0.7202
2009-01-03,14394.547,15155.94,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.08,0.08,1.72,2.46,2.83,-0.25,0.17,1.0,,0.59,1.33,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,503000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,88.583,69.819,490.0,2014.878,1082.708,3.246,46.17,-2.32,79.257,107.2518,11126941.0,77.2997,798335.0,1537.8605,2.9618,1.768,1.0916,0.7202
2009-01-04,14394.547,15155.94,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.08,0.08,1.72,2.46,2.83,-0.25,0.17,1.0,,0.59,1.33,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,503000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,88.583,69.819,490.0,2014.878,1082.708,3.246,46.17,-2.32,79.257,107.2518,11126941.0,77.2997,798335.0,1537.8605,2.9618,1.768,1.0916,0.7202
2009-01-05,14394.547,15155.94,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1629.7,8261.5,9.132,1.731,0.11,0.14,1.67,2.49,3.0,-0.19,0.15,1.0,,0.49,1.28,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,503000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,88.583,69.819,490.0,2014.878,1082.708,3.246,48.61,-2.32,80.0914,107.5888,11126941.0,77.2997,798335.0,1537.8605,3.013,1.7816,1.0812,0.7011


In [45]:
fred_df.tail()

Unnamed: 0_level_0,us_gdp,us_real_GDP,us_real_pot_gdp,us_cpi_all,us_cpi_no_food_and_energy,us_cpi_no_food_and_energy_perc_change,us_median_cpi,us_gdp_ipd,us_monetart_base,us_m1_money,us_m2_money,us_velocity_m1_money,us_velocity_m2_money,us_fed_funds_rate,us_3_month_secondary_tbill,us_5_year_treasury_const_mat_rate,us_10_year_treasury_const_mat_rate,us_30_year_treasury_const_mat_rate,us_5_year_be_inflation_rate,us_10_year_be_inflation_rate,us_20_year_be_inflation_rate,us_30_year_be_inflation_rate,us_5_yr_5_yr_fwd_inflatn_expectation,us_ted_spread,us_prime_loan_rate,us_unemployment_rate,us_long_natural_unemployment_rate,us_short_natural_unemployment_rate,us_labour_force_employ_rate,us_pop_employ_rate,us_num_unemployed,us_nonfarm_num_employed,us_num_employed_in_manufactoring,us_num_filing_for_unemployment,us_median_house_income,us_total_real_disposable_income,us_tot_personal_consumption_spend,us_tot_personal_consumption_spend_dg,us_percent_personal_saving_rate,us_real_retail_and_food_sales,us_total_disposble_income,us_industry_production_index,us_capacity_utilisation,us_new_housing_devs_started,us_gross_private_domestic_investment,us_corporate_profit_aftr_tax,us_financial_stress_index,west_texas_crude_oil_price,us_leading_index,us_currency_trade_weighted_dollar_index,us_broad_trade_weighted_dollar_index,us_total_public_debt,us_public_debt_as_perc_of_gdp,us_bank_excess_capital_reserves,us_total_commercial_loans,us_10_year_yield,us_5_year_yield,us_3_year_yield,us_2_year_yield
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1
2021-06-04,22061.025,19088.064,19594.22,266.832,273.7,2.272549,2.895341,115.575,3382.8,18136.0,19417.7,1.198,1.122,0.06,0.02,0.78,1.56,2.24,2.54,2.4,2.38,2.25,2.26,0.12,3.25,5.8,4.463892,4.463892,61.6,58.0,9316.0,144894.0,12290.0,385000.0,68703.0,16521.1,15560.3,2097.2,14.9,232327.0,18844.5,99.0122,74.6203,1569.0,3906.781,2177.942,-0.07,67.8,1.72,90.8221,128.0097,28132570.0,127.52159,2854690.0,2543.739,1.7202,0.7871,0.3652,0.1734
2021-06-05,22061.025,19088.064,19594.22,266.832,273.7,2.272549,2.895341,115.575,3382.8,18136.0,19417.7,1.198,1.122,0.06,0.02,0.78,1.56,2.24,2.54,2.4,2.38,2.25,2.26,0.12,3.25,5.8,4.463892,4.463892,61.6,58.0,9316.0,144894.0,12290.0,385000.0,68703.0,16521.1,15560.3,2097.2,14.9,232327.0,18844.5,99.0122,74.6203,1569.0,3906.781,2177.942,-0.07,67.8,1.72,90.8221,128.0097,28132570.0,127.52159,2854690.0,2543.739,1.7202,0.7871,0.3652,0.1734
2021-06-06,22061.025,19088.064,19594.22,266.832,273.7,2.272549,2.895341,115.575,3382.8,18136.0,19417.7,1.198,1.122,0.06,0.02,0.78,1.56,2.24,2.54,2.4,2.38,2.25,2.26,0.12,3.25,5.8,4.463892,4.463892,61.6,58.0,9316.0,144894.0,12290.0,385000.0,68703.0,16521.1,15560.3,2097.2,14.9,232327.0,18844.5,99.0122,74.6203,1569.0,3906.781,2177.942,-0.07,67.8,1.72,90.8221,128.0097,28132570.0,127.52159,2854690.0,2543.739,1.7202,0.7871,0.3652,0.1734
2021-06-07,22061.025,19088.064,19594.22,266.832,273.7,2.272549,2.895341,115.575,3382.8,18136.0,19417.7,1.198,1.122,0.06,0.02,0.78,1.56,2.24,2.51,2.39,2.38,2.25,2.27,0.12,3.25,5.8,4.463892,4.463892,61.6,58.0,9316.0,144894.0,12290.0,385000.0,68703.0,16521.1,15560.3,2097.2,14.9,232327.0,18844.5,99.0122,74.6203,1569.0,3906.781,2177.942,-0.07,67.8,1.72,90.8221,128.0097,28132570.0,127.52159,2854690.0,2543.739,1.7202,0.7871,0.3652,0.1734
2021-06-08,22061.025,19088.064,19594.22,266.832,273.7,2.272549,2.895341,115.575,3382.8,18136.0,19417.7,1.198,1.122,0.06,0.02,0.78,1.56,2.24,2.51,2.39,2.38,2.25,2.27,0.12,3.25,5.8,4.463892,4.463892,61.6,58.0,9316.0,144894.0,12290.0,385000.0,68703.0,16521.1,15560.3,2097.2,14.9,232327.0,18844.5,99.0122,74.6203,1569.0,3906.781,2177.942,-0.07,67.8,1.72,90.8221,128.0097,28132570.0,127.52159,2854690.0,2543.739,1.7202,0.7871,0.3652,0.1734


##### Load more data from Quandl API

try:
    copper_download = quandl.get("CHRIS/CME_HG2", start_date=start_date)
    print(copper_download)
    gold_download = quandl.get("CHRIS/CME_GC2", start_date=start_date)
    print(gold_download)

except Exception as e:
    print(e)
    
    
This returns:
1. Open
2. High
3. Low
4. Last
5. Change
6. Settle
7. Volume
8. Previous Day Open Interest 

### Get economic data from db.nomics.world

#### Scrape the data

In [46]:
time_periods_dict = config_variables["db_nomics_time_periods"]
standalone_tickers = config_variables["db_nomics_eurostat_tickers_standalone"]

country_codes_dict = config_variables["db_nomics_countries"]
country_specific_tickers = config_variables["db_nomics_eurostat_tickers_for_countries"]

In [47]:
dbnomics_data, failed_urls = dc.scrape_db_nomics_economic_data(scrape_directory, start_date, standalone_tickers, country_specific_tickers, country_codes_dict, time_periods_dict)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=561.0), HTML(value='')))




#### Check where nan values are in this data

In [48]:
dbnomics_nan_col_dates = dc.find_col_nan_ranges(dbnomics_data, output=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=23.0), HTML(value='')))


---------------------------------------------------------------------
23 columns had a 'NaN' value in them:
['turkey_house_price_index',
 'italy_house_price_index',
 'turkey_secondary_house_purchases',
 'italy_secondary_house_purchases',
 'turkey_housing_price_index',
 'italy_housing_price_index',
 'germany_job_vacancy_rate',
 'turkey_retail_business_sales_confidence',
 'denmark_retail_business_sales_confidence',
 'turkey_services_business_3m_price_expectation',
 'denmark_services_business_3m_price_expectation',
 'turkey_business_employment_expectation',
 'denmark_gov_debt_all',
 'sweden_gov_debt_all',
 'euro_area_percent_afford_unexpected_expenses',
 'germany_trust_in_legal_system',
 'france_trust_in_legal_system',
 'turkey_trust_in_legal_system',
 'uk_trust_in_legal_system',
 'italy_trust_in_legal_system',
 'belgium_trust_in_legal_system',
 'denmark_trust_in_legal_system',
 'sweden_trust_in_legal_system']
---------------------------------------------------------------------
The date

#### Manually inspect the data

In [49]:
dbnomics_data.shape

(4542, 207)

In [50]:
dbnomics_data.head()

Unnamed: 0_level_0,all_euro_forward_yields_Y10,all_euro_forward_yields_Y5,all_euro_forward_yields_Y2,all_euro_forward_yields_Y1,all_euro_forward_yields_M6,all_euro_forward_yields_M3,all_euro_spot_yields_Y10,all_euro_spot_yields_Y5,all_euro_spot_yields_Y2,all_euro_spot_yields_Y1,all_euro_spot_yields_M6,all_euro_spot_yields_M3,turkey_government_10_yr_bonds,japan_government_10_yr_bonds,us_government_10_yr_bonds,euro_area_emu_convergence_bond_yield,germany_emu_convergence_bond_yield,france_emu_convergence_bond_yield,uk_emu_convergence_bond_yield,italy_emu_convergence_bond_yield,belgium_emu_convergence_bond_yield,denmark_emu_convergence_bond_yield,sweden_emu_convergence_bond_yield,euro_19_gov_debt,germany_gov_debt,france_gov_debt,italy_gov_debt,belgium_gov_debt,denmark_gov_debt,sweden_gov_debt,euro_area_harmonised_index_consumer_prices,euro_19_harmonised_index_consumer_prices,germany_harmonised_index_consumer_prices,france_harmonised_index_consumer_prices,turkey_harmonised_index_consumer_prices,uk_harmonised_index_consumer_prices,us_harmonised_index_consumer_prices,italy_harmonised_index_consumer_prices,belgium_harmonised_index_consumer_prices,denmark_harmonised_index_consumer_prices,sweden_harmonised_index_consumer_prices,euro_area_house_price_index,euro_19_house_price_index,germany_house_price_index,france_house_price_index,turkey_house_price_index,uk_house_price_index,italy_house_price_index,belgium_house_price_index,denmark_house_price_index,...,italy_retail_business_sales_confidence,belgium_retail_business_sales_confidence,denmark_retail_business_sales_confidence,sweden_retail_business_sales_confidence,euro_19_services_business_3m_price_expectation,germany_services_business_3m_price_expectation,france_services_business_3m_price_expectation,turkey_services_business_3m_price_expectation,uk_services_business_3m_price_expectation,italy_services_business_3m_price_expectation,belgium_services_business_3m_price_expectation,denmark_services_business_3m_price_expectation,sweden_services_business_3m_price_expectation,euro_19_business_employment_expectation,germany_business_employment_expectation,france_business_employment_expectation,turkey_business_employment_expectation,uk_business_employment_expectation,italy_business_employment_expectation,belgium_business_employment_expectation,denmark_business_employment_expectation,sweden_business_employment_expectation,germany_percent_housholds_w_internet,france_percent_housholds_w_internet,turkey_percent_housholds_w_internet,uk_percent_housholds_w_internet,italy_percent_housholds_w_internet,belgium_percent_housholds_w_internet,denmark_percent_housholds_w_internet,sweden_percent_housholds_w_internet,denmark_gov_debt_all,sweden_gov_debt_all,euro_area_percent_afford_unexpected_expenses,euro_19_percent_afford_unexpected_expenses,germany_percent_afford_unexpected_expenses,france_percent_afford_unexpected_expenses,turkey_percent_afford_unexpected_expenses,uk_percent_afford_unexpected_expenses,italy_percent_afford_unexpected_expenses,belgium_percent_afford_unexpected_expenses,denmark_percent_afford_unexpected_expenses,sweden_percent_afford_unexpected_expenses,germany_trust_in_legal_system,france_trust_in_legal_system,turkey_trust_in_legal_system,uk_trust_in_legal_system,italy_trust_in_legal_system,belgium_trust_in_legal_system,denmark_trust_in_legal_system,sweden_trust_in_legal_system
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2009-01-01,4.86969,4.22276,3.13145,2.45337,1.91688,1.71857,3.92322,3.20703,2.41026,1.9939,1.79447,1.78778,15.96,1.26,2.49,3.68,2.93,3.41,3.04,4.49,3.77,3.31,2.43,286127.5,10321.0,25629.0,212368.8,1185.0,1917.0,5937.8,90.71,90.73,91.1,91.67,61.66,84.9,88.32,89.5,88.5,90.8,93.02,100.63,100.59,81.8,95.51,,78.69,,88.95,87.14,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-02,4.86586,4.24995,3.14731,2.43366,1.86285,1.66169,3.93396,3.21408,2.38987,1.95111,1.74592,1.74835,15.96,1.26,2.49,3.68,2.97,3.38,2.84,4.45,3.8,3.31,2.44,286127.5,10321.0,25629.0,212368.8,1185.0,1917.0,5937.8,90.71,90.73,91.1,91.67,61.66,84.9,88.32,89.5,88.5,90.8,93.02,100.63,100.59,81.8,95.51,,78.69,,88.95,87.14,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-03,4.86586,4.24995,3.14731,2.43366,1.86285,1.66169,3.93396,3.21408,2.38987,1.95111,1.74592,1.74835,15.96,1.26,2.49,3.68,2.97,3.38,2.84,4.45,3.8,3.31,2.44,286127.5,10321.0,25629.0,212368.8,1185.0,1917.0,5937.8,90.71,90.73,91.1,91.67,61.66,84.9,88.32,89.5,88.5,90.8,93.02,100.63,100.59,81.8,95.51,,78.69,,88.95,87.14,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-04,4.86586,4.24995,3.14731,2.43366,1.86285,1.66169,3.93396,3.21408,2.38987,1.95111,1.74592,1.74835,15.96,1.26,2.49,3.68,2.97,3.38,2.84,4.45,3.8,3.31,2.44,286127.5,10321.0,25629.0,212368.8,1185.0,1917.0,5937.8,90.71,90.73,91.1,91.67,61.66,84.9,88.32,89.5,88.5,90.8,93.02,100.63,100.59,81.8,95.51,,78.69,,88.95,87.14,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-05,4.91925,4.2568,3.12237,2.39963,1.85158,1.66637,3.9422,3.1999,2.36672,1.93611,1.7415,1.74164,15.96,1.26,2.49,3.72,3.0,3.48,2.96,4.47,3.81,3.32,2.55,286127.5,10321.0,25629.0,212368.8,1185.0,1917.0,5937.8,90.71,90.73,91.1,91.67,61.66,84.9,88.32,89.5,88.5,90.8,93.02,100.63,100.59,81.8,95.51,,78.69,,88.95,87.14,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,


In [51]:
dbnomics_data.tail()

Unnamed: 0_level_0,all_euro_forward_yields_Y10,all_euro_forward_yields_Y5,all_euro_forward_yields_Y2,all_euro_forward_yields_Y1,all_euro_forward_yields_M6,all_euro_forward_yields_M3,all_euro_spot_yields_Y10,all_euro_spot_yields_Y5,all_euro_spot_yields_Y2,all_euro_spot_yields_Y1,all_euro_spot_yields_M6,all_euro_spot_yields_M3,turkey_government_10_yr_bonds,japan_government_10_yr_bonds,us_government_10_yr_bonds,euro_area_emu_convergence_bond_yield,germany_emu_convergence_bond_yield,france_emu_convergence_bond_yield,uk_emu_convergence_bond_yield,italy_emu_convergence_bond_yield,belgium_emu_convergence_bond_yield,denmark_emu_convergence_bond_yield,sweden_emu_convergence_bond_yield,euro_19_gov_debt,germany_gov_debt,france_gov_debt,italy_gov_debt,belgium_gov_debt,denmark_gov_debt,sweden_gov_debt,euro_area_harmonised_index_consumer_prices,euro_19_harmonised_index_consumer_prices,germany_harmonised_index_consumer_prices,france_harmonised_index_consumer_prices,turkey_harmonised_index_consumer_prices,uk_harmonised_index_consumer_prices,us_harmonised_index_consumer_prices,italy_harmonised_index_consumer_prices,belgium_harmonised_index_consumer_prices,denmark_harmonised_index_consumer_prices,sweden_harmonised_index_consumer_prices,euro_area_house_price_index,euro_19_house_price_index,germany_house_price_index,france_house_price_index,turkey_house_price_index,uk_house_price_index,italy_house_price_index,belgium_house_price_index,denmark_house_price_index,...,italy_retail_business_sales_confidence,belgium_retail_business_sales_confidence,denmark_retail_business_sales_confidence,sweden_retail_business_sales_confidence,euro_19_services_business_3m_price_expectation,germany_services_business_3m_price_expectation,france_services_business_3m_price_expectation,turkey_services_business_3m_price_expectation,uk_services_business_3m_price_expectation,italy_services_business_3m_price_expectation,belgium_services_business_3m_price_expectation,denmark_services_business_3m_price_expectation,sweden_services_business_3m_price_expectation,euro_19_business_employment_expectation,germany_business_employment_expectation,france_business_employment_expectation,turkey_business_employment_expectation,uk_business_employment_expectation,italy_business_employment_expectation,belgium_business_employment_expectation,denmark_business_employment_expectation,sweden_business_employment_expectation,germany_percent_housholds_w_internet,france_percent_housholds_w_internet,turkey_percent_housholds_w_internet,uk_percent_housholds_w_internet,italy_percent_housholds_w_internet,belgium_percent_housholds_w_internet,denmark_percent_housholds_w_internet,sweden_percent_housholds_w_internet,denmark_gov_debt_all,sweden_gov_debt_all,euro_area_percent_afford_unexpected_expenses,euro_19_percent_afford_unexpected_expenses,germany_percent_afford_unexpected_expenses,france_percent_afford_unexpected_expenses,turkey_percent_afford_unexpected_expenses,uk_percent_afford_unexpected_expenses,italy_percent_afford_unexpected_expenses,belgium_percent_afford_unexpected_expenses,denmark_percent_afford_unexpected_expenses,sweden_percent_afford_unexpected_expenses,germany_trust_in_legal_system,france_trust_in_legal_system,turkey_trust_in_legal_system,uk_trust_in_legal_system,italy_trust_in_legal_system,belgium_trust_in_legal_system,denmark_trust_in_legal_system,sweden_trust_in_legal_system
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2021-06-04,1.21205,0.22965,-0.47677,-0.58022,-0.60152,-0.60899,0.2349,-0.31875,-0.56844,-0.60064,-0.60924,-0.61321,18.3,0.08,1.61,0.19,-0.25,0.16,0.79,0.92,0.15,0.14,0.43,365392.2,14768.0,45938.0,228928.7,1465.0,2792.7,7485.5,107.43,107.14,108.4,107.4,204.02,108.9,110.67,105.1,110.62,104.1,109.54,127.23,127.23,143.5,119.49,191.9,121.15,100.1,122.25,127.84,...,11.9,5.7,10.3,1.0,7.6,18.5,-0.3,22.7,-11.5,3.6,4.9,-2.6,5.0,10.0,2.7,17.3,-10.6,-11.9,9.8,13.0,6.5,16.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-06-05,1.21205,0.22965,-0.47677,-0.58022,-0.60152,-0.60899,0.2349,-0.31875,-0.56844,-0.60064,-0.60924,-0.61321,18.3,0.08,1.61,0.19,-0.25,0.16,0.79,0.92,0.15,0.14,0.43,365392.2,14768.0,45938.0,228928.7,1465.0,2792.7,7485.5,107.43,107.14,108.4,107.4,204.02,108.9,110.67,105.1,110.62,104.1,109.54,127.23,127.23,143.5,119.49,191.9,121.15,100.1,122.25,127.84,...,11.9,5.7,10.3,1.0,7.6,18.5,-0.3,22.7,-11.5,3.6,4.9,-2.6,5.0,10.0,2.7,17.3,-10.6,-11.9,9.8,13.0,6.5,16.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-06-06,1.21205,0.22965,-0.47677,-0.58022,-0.60152,-0.60899,0.2349,-0.31875,-0.56844,-0.60064,-0.60924,-0.61321,18.3,0.08,1.61,0.19,-0.25,0.16,0.79,0.92,0.15,0.14,0.43,365392.2,14768.0,45938.0,228928.7,1465.0,2792.7,7485.5,107.43,107.14,108.4,107.4,204.02,108.9,110.67,105.1,110.62,104.1,109.54,127.23,127.23,143.5,119.49,191.9,121.15,100.1,122.25,127.84,...,11.9,5.7,10.3,1.0,7.6,18.5,-0.3,22.7,-11.5,3.6,4.9,-2.6,5.0,10.0,2.7,17.3,-10.6,-11.9,9.8,13.0,6.5,16.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-06-07,1.21205,0.22965,-0.47677,-0.58022,-0.60152,-0.60899,0.2349,-0.31875,-0.56844,-0.60064,-0.60924,-0.61321,18.3,0.08,1.61,0.19,-0.25,0.16,0.79,0.92,0.15,0.14,0.43,365392.2,14768.0,45938.0,228928.7,1465.0,2792.7,7485.5,107.43,107.14,108.4,107.4,204.02,108.9,110.67,105.1,110.62,104.1,109.54,127.23,127.23,143.5,119.49,191.9,121.15,100.1,122.25,127.84,...,11.9,5.7,10.3,1.0,7.6,18.5,-0.3,22.7,-11.5,3.6,4.9,-2.6,5.0,10.0,2.7,17.3,-10.6,-11.9,9.8,13.0,6.5,16.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-06-08,1.21205,0.22965,-0.47677,-0.58022,-0.60152,-0.60899,0.2349,-0.31875,-0.56844,-0.60064,-0.60924,-0.61321,18.3,0.08,1.61,0.19,-0.25,0.16,0.79,0.92,0.15,0.14,0.43,365392.2,14768.0,45938.0,228928.7,1465.0,2792.7,7485.5,107.43,107.14,108.4,107.4,204.02,108.9,110.67,105.1,110.62,104.1,109.54,127.23,127.23,143.5,119.49,191.9,121.15,100.1,122.25,127.84,...,11.9,5.7,10.3,1.0,7.6,18.5,-0.3,22.7,-11.5,3.6,4.9,-2.6,5.0,10.0,2.7,17.3,-10.6,-11.9,9.8,13.0,6.5,16.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7


### Merge these economic datasets together

In [52]:
# create a master dataframe with all the columns from both data sources
list_of_economic_dfs = [fred_df, dbnomics_data]
merged_economic_df = dc.merge_dfs_on_index(list_of_economic_dfs)

In [53]:
merged_economic_df

Unnamed: 0_level_0,us_gdp,us_real_GDP,us_real_pot_gdp,us_cpi_all,us_cpi_no_food_and_energy,us_cpi_no_food_and_energy_perc_change,us_median_cpi,us_gdp_ipd,us_monetart_base,us_m1_money,us_m2_money,us_velocity_m1_money,us_velocity_m2_money,us_fed_funds_rate,us_3_month_secondary_tbill,us_5_year_treasury_const_mat_rate,us_10_year_treasury_const_mat_rate,us_30_year_treasury_const_mat_rate,us_5_year_be_inflation_rate,us_10_year_be_inflation_rate,us_20_year_be_inflation_rate,us_30_year_be_inflation_rate,us_5_yr_5_yr_fwd_inflatn_expectation,us_ted_spread,us_prime_loan_rate,us_unemployment_rate,us_long_natural_unemployment_rate,us_short_natural_unemployment_rate,us_labour_force_employ_rate,us_pop_employ_rate,us_num_unemployed,us_nonfarm_num_employed,us_num_employed_in_manufactoring,us_num_filing_for_unemployment,us_median_house_income,us_total_real_disposable_income,us_tot_personal_consumption_spend,us_tot_personal_consumption_spend_dg,us_percent_personal_saving_rate,us_real_retail_and_food_sales,us_total_disposble_income,us_industry_production_index,us_capacity_utilisation,us_new_housing_devs_started,us_gross_private_domestic_investment,us_corporate_profit_aftr_tax,us_financial_stress_index,west_texas_crude_oil_price,us_leading_index,us_currency_trade_weighted_dollar_index,...,italy_retail_business_sales_confidence,belgium_retail_business_sales_confidence,denmark_retail_business_sales_confidence,sweden_retail_business_sales_confidence,euro_19_services_business_3m_price_expectation,germany_services_business_3m_price_expectation,france_services_business_3m_price_expectation,turkey_services_business_3m_price_expectation,uk_services_business_3m_price_expectation,italy_services_business_3m_price_expectation,belgium_services_business_3m_price_expectation,denmark_services_business_3m_price_expectation,sweden_services_business_3m_price_expectation,euro_19_business_employment_expectation,germany_business_employment_expectation,france_business_employment_expectation,turkey_business_employment_expectation,uk_business_employment_expectation,italy_business_employment_expectation,belgium_business_employment_expectation,denmark_business_employment_expectation,sweden_business_employment_expectation,germany_percent_housholds_w_internet,france_percent_housholds_w_internet,turkey_percent_housholds_w_internet,uk_percent_housholds_w_internet,italy_percent_housholds_w_internet,belgium_percent_housholds_w_internet,denmark_percent_housholds_w_internet,sweden_percent_housholds_w_internet,denmark_gov_debt_all,sweden_gov_debt_all,euro_area_percent_afford_unexpected_expenses,euro_19_percent_afford_unexpected_expenses,germany_percent_afford_unexpected_expenses,france_percent_afford_unexpected_expenses,turkey_percent_afford_unexpected_expenses,uk_percent_afford_unexpected_expenses,italy_percent_afford_unexpected_expenses,belgium_percent_afford_unexpected_expenses,denmark_percent_afford_unexpected_expenses,sweden_percent_afford_unexpected_expenses,germany_trust_in_legal_system,france_trust_in_legal_system,turkey_trust_in_legal_system,uk_trust_in_legal_system,italy_trust_in_legal_system,belgium_trust_in_legal_system,denmark_trust_in_legal_system,sweden_trust_in_legal_system
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2009-01-01,14394.547,15155.940,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.14,0.11,1.55,2.25,2.69,-0.26,0.11,1.00,,0.48,1.32,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,533000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,88.5830,69.8190,490.0,2014.878,1082.708,3.308,44.60,-2.32,79.5258,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-02,14394.547,15155.940,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.08,0.08,1.72,2.46,2.83,-0.25,0.17,1.00,,0.59,1.33,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,533000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,88.5830,69.8190,490.0,2014.878,1082.708,3.246,46.17,-2.32,79.2570,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-03,14394.547,15155.940,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.08,0.08,1.72,2.46,2.83,-0.25,0.17,1.00,,0.59,1.33,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,503000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,88.5830,69.8190,490.0,2014.878,1082.708,3.246,46.17,-2.32,79.2570,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-04,14394.547,15155.940,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1597.3,8229.5,9.132,1.731,0.08,0.08,1.72,2.46,2.83,-0.25,0.17,1.00,,0.59,1.33,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,503000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,88.5830,69.8190,490.0,2014.878,1082.708,3.246,46.17,-2.32,79.2570,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
2009-01-05,14394.547,15155.940,15971.43506,211.933,217.346,2.552248,2.403641,94.976,1690.829,1629.7,8261.5,9.132,1.731,0.11,0.14,1.67,2.49,3.00,-0.19,0.15,1.00,,0.49,1.28,3.25,7.8,4.875262,5.212292,65.7,60.6,12058.0,134055.0,12561.0,503000.0,59458.0,11646.5,9783.8,1023.0,6.2,158979.0,10851.4,88.5830,69.8190,490.0,2014.878,1082.708,3.246,48.61,-2.32,80.0914,...,5.3,16.0,,30.9,-0.8,2.2,-8.3,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2021-06-04,22061.025,19088.064,19594.22000,266.832,273.700,2.272549,2.895341,115.575,3382.800,18136.0,19417.7,1.198,1.122,0.06,0.02,0.78,1.56,2.24,2.54,2.40,2.38,2.25,2.26,0.12,3.25,5.8,4.463892,4.463892,61.6,58.0,9316.0,144894.0,12290.0,385000.0,68703.0,16521.1,15560.3,2097.2,14.9,232327.0,18844.5,99.0122,74.6203,1569.0,3906.781,2177.942,-0.070,67.80,1.72,90.8221,...,11.9,5.7,10.3,1.0,7.6,18.5,-0.3,22.7,-11.5,3.6,4.9,-2.6,5.0,10.0,2.7,17.3,-10.6,-11.9,9.8,13.0,6.5,16.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-06-05,22061.025,19088.064,19594.22000,266.832,273.700,2.272549,2.895341,115.575,3382.800,18136.0,19417.7,1.198,1.122,0.06,0.02,0.78,1.56,2.24,2.54,2.40,2.38,2.25,2.26,0.12,3.25,5.8,4.463892,4.463892,61.6,58.0,9316.0,144894.0,12290.0,385000.0,68703.0,16521.1,15560.3,2097.2,14.9,232327.0,18844.5,99.0122,74.6203,1569.0,3906.781,2177.942,-0.070,67.80,1.72,90.8221,...,11.9,5.7,10.3,1.0,7.6,18.5,-0.3,22.7,-11.5,3.6,4.9,-2.6,5.0,10.0,2.7,17.3,-10.6,-11.9,9.8,13.0,6.5,16.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-06-06,22061.025,19088.064,19594.22000,266.832,273.700,2.272549,2.895341,115.575,3382.800,18136.0,19417.7,1.198,1.122,0.06,0.02,0.78,1.56,2.24,2.54,2.40,2.38,2.25,2.26,0.12,3.25,5.8,4.463892,4.463892,61.6,58.0,9316.0,144894.0,12290.0,385000.0,68703.0,16521.1,15560.3,2097.2,14.9,232327.0,18844.5,99.0122,74.6203,1569.0,3906.781,2177.942,-0.070,67.80,1.72,90.8221,...,11.9,5.7,10.3,1.0,7.6,18.5,-0.3,22.7,-11.5,3.6,4.9,-2.6,5.0,10.0,2.7,17.3,-10.6,-11.9,9.8,13.0,6.5,16.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7
2021-06-07,22061.025,19088.064,19594.22000,266.832,273.700,2.272549,2.895341,115.575,3382.800,18136.0,19417.7,1.198,1.122,0.06,0.02,0.78,1.56,2.24,2.51,2.39,2.38,2.25,2.27,0.12,3.25,5.8,4.463892,4.463892,61.6,58.0,9316.0,144894.0,12290.0,385000.0,68703.0,16521.1,15560.3,2097.2,14.9,232327.0,18844.5,99.0122,74.6203,1569.0,3906.781,2177.942,-0.070,67.80,1.72,90.8221,...,11.9,5.7,10.3,1.0,7.6,18.5,-0.3,22.7,-11.5,3.6,4.9,-2.6,5.0,10.0,2.7,17.3,-10.6,-11.9,9.8,13.0,6.5,16.6,96.0,90.0,91.0,97.0,88.0,91.0,95.0,94.0,0.0,0.0,28.7,28.7,23.5,27.6,26.6,31.1,33.1,26.0,25.6,20.9,5.3,4.5,5.4,5.5,3.6,5.0,7.5,6.7


#### Check for nan's

In [54]:
merged_economic_nan_col_dates = dc.find_col_nan_ranges(merged_economic_df, output=True)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=24.0), HTML(value='')))


---------------------------------------------------------------------
24 columns had a 'NaN' value in them:
['us_30_year_be_inflation_rate',
 'turkey_house_price_index',
 'italy_house_price_index',
 'turkey_secondary_house_purchases',
 'italy_secondary_house_purchases',
 'turkey_housing_price_index',
 'italy_housing_price_index',
 'germany_job_vacancy_rate',
 'turkey_retail_business_sales_confidence',
 'denmark_retail_business_sales_confidence',
 'turkey_services_business_3m_price_expectation',
 'denmark_services_business_3m_price_expectation',
 'turkey_business_employment_expectation',
 'denmark_gov_debt_all',
 'sweden_gov_debt_all',
 'euro_area_percent_afford_unexpected_expenses',
 'germany_trust_in_legal_system',
 'france_trust_in_legal_system',
 'turkey_trust_in_legal_system',
 'uk_trust_in_legal_system',
 'italy_trust_in_legal_system',
 'belgium_trust_in_legal_system',
 'denmark_trust_in_legal_system',
 'sweden_trust_in_legal_system']
---------------------------------------------

### Output an economic data file

In [55]:
merged_economic_df.to_csv(os.path.join(scrape_directory, "all_economic_data.csv"))

# ----------------------------------------------------------------------------------------------------------------

# Get social media data

### Get sentiment analysis data on all bitcoin related tweets

Cant run this because of the rate limit limitations

In [56]:
bitcoin_query_words = config_variables["twitter_bitcoin_query_words"]

In [336]:
twitter_bitcoin_sentiment_df = dc.scrape_tweets_sentiment(bitcoin_query_words, start_date, config_variables)

SystemExit: 0

### Get tweet count data on tweets by specific influencers

##### Bitcoin crypto influencers on Twitter

1. CZ Binance: Changpeng Zao is the Founder and CEO of Binance, a exchange that moves more than 3 Billion.
2. Justin Sun: is a Chinese entrepreneur and the founder of the cryptocurrency TRON or TRX, that first was an ERC-20 token and then launched their self mainnet.
3. John McAfee: is an important developer, businessman and founder of the antivirus company. He is known for commenting important and promising crypto projects.
4. Charlie Lee: is a computer science and creator of Litecoin (LTC). Known also for dumping his own cryptocurrency saying that he already sold nearly all of his assets in LTC.
5. Jackson Palmer: is a computer programmer, writer and founder of Dogecoin (DOGE), one of the most important stable coins in the actual market.
6. Star Xu: CEO and founder of OKCoin that raised more than 10 Million in investment. Xu was suspected of fraud. Investors accused him.

    * https://www.tweetbinder.com/blog/bitcoin-twitter/
    
---

https://consensys.net/blog/trending/i-read-crypto-twitter-for-hours-daily-here-are-the-40-accounts-that-really-matter/

https://coinbound.io/best-crypto-influencers-on-twitter/

https://blog.makerdao.com/the-top-10-crypto-and-defi-influencers-on-twitter/

In [182]:
bitcoin_influencers = config_variables["twitter_bitcoin_influencers"]

In [12]:
twitter_influencer_df = dc.scrape_influencer_tweets(bitcoin_influencers, bitcoin_query, start_date, config_variables)

HBox(children=(HTML(value=''), FloatProgress(value=0.0, max=4484.0), HTML(value='')))

CRITICAL:root:twint.get:User:'NoneType' object is not subscriptable
CRITICAL:root:twint.run:Twint:Feed:noDataExpecting value: line 1 column 1 (char 0)
sleeping for 1.0 secs
CRITICAL:root:twint.run:Twint:Feed:noDataExpecting value: line 1 column 1 (char 0)
sleeping for 8.0 secs





KeyboardInterrupt: 

### Get sentiment of Elon Musk bitcoin related tweets

In [57]:
musk_bitcoin_tweet_sentiment_features = dc.get_musk_bitcoin_sentiment_data(scrape_directory, bitcoin_query_words, start_date)

Scraping tweets from '2010-06-04' --> '2021-03-22'
  - 12339 tweets scraped


#### Get breakdown of tweets

In [58]:
musk_bitcoin_tweet_sentiment_features.head()

Unnamed: 0_level_0,musk_num_tweets,musk_num_pos_tweets,musk_num_neg_tweets,musk_num_neut_tweets,musk_percent_pos,musk_percent_neg,musk_percent_neut
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2009-01-01,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-02,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-03,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-04,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-05,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [59]:
pd.DataFrame(musk_bitcoin_tweet_sentiment_features.value_counts())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,0
musk_num_tweets,musk_num_pos_tweets,musk_num_neg_tweets,musk_num_neut_tweets,musk_percent_pos,musk_percent_neg,musk_percent_neut,Unnamed: 7_level_1
0.0,0.0,0.0,0.0,0.0,0.0,0.0,4502
1.0,0.0,0.0,1.0,0.0,0.0,100.0,8
1.0,1.0,0.0,0.0,100.0,0.0,0.0,7
2.0,0.0,0.0,2.0,0.0,0.0,100.0,5
1.0,0.0,1.0,0.0,0.0,100.0,0.0,4
2.0,2.0,0.0,0.0,100.0,0.0,0.0,2
5.0,5.0,0.0,0.0,100.0,0.0,0.0,2
4.0,2.0,0.0,2.0,50.0,0.0,50.0,2
2.0,0.0,1.0,1.0,0.0,50.0,50.0,1
2.0,0.0,2.0,0.0,0.0,100.0,0.0,1


### Merge all the social media data into one table

In [60]:
merged_social_media = musk_bitcoin_tweet_sentiment_features

### Output all the social media data

In [61]:
merged_social_media.to_csv(os.path.join(scrape_directory, "all_social_media_data.csv"))

# ----------------------------------------------------------------------------------------------------------------

# Read back in all the scraped data from the different sources and merge it into one df

In [62]:
list_of_files = ["all_internal_data.csv",
                 "all_stock_data.csv",
                 "all_economic_data.csv",
                 "all_social_media_data.csv",
                ]

In [63]:
merged_df = dc.read_in_all_data_and_merge_it(scrape_directory, list_of_files, start_date)

In [64]:
merged_df.head()

Unnamed: 0_level_0,trading_volume,trades_per_minute,volatility,bid_and_ask_spread,bid_and_ask_sum_asks,bid_and_ask_sum_bids,time_between_blocks,block_size_votes,rank_bit_x,rank_bitfinex,rank_bitstamp,rank_btce,rank_coinbase,rank_itbit,rank_kraken,rank_mtgox,rank_okcoin,rank_others,marketcap3sma,marketcap7sma,marketcap14sma,marketcap30sma,marketcap90sma,marketcap3ema,marketcap7ema,marketcap14ema,marketcap30ema,marketcap90ema,marketcap3wma,marketcap7wma,marketcap14wma,marketcap30wma,marketcap90wma,marketcap3trx,marketcap7trx,marketcap14trx,marketcap30trx,marketcap90trx,marketcap3mom,marketcap7mom,marketcap14mom,marketcap30mom,marketcap90mom,marketcap3std,marketcap7std,marketcap14std,marketcap30std,marketcap90std,marketcap3var,marketcap7var,...,turkey_services_business_3m_price_expectation,uk_services_business_3m_price_expectation,italy_services_business_3m_price_expectation,belgium_services_business_3m_price_expectation,denmark_services_business_3m_price_expectation,sweden_services_business_3m_price_expectation,euro_19_business_employment_expectation,germany_business_employment_expectation,france_business_employment_expectation,turkey_business_employment_expectation,uk_business_employment_expectation,italy_business_employment_expectation,belgium_business_employment_expectation,denmark_business_employment_expectation,sweden_business_employment_expectation,germany_percent_housholds_w_internet,france_percent_housholds_w_internet,turkey_percent_housholds_w_internet,uk_percent_housholds_w_internet,italy_percent_housholds_w_internet,belgium_percent_housholds_w_internet,denmark_percent_housholds_w_internet,sweden_percent_housholds_w_internet,denmark_gov_debt_all,sweden_gov_debt_all,euro_area_percent_afford_unexpected_expenses,euro_19_percent_afford_unexpected_expenses,germany_percent_afford_unexpected_expenses,france_percent_afford_unexpected_expenses,turkey_percent_afford_unexpected_expenses,uk_percent_afford_unexpected_expenses,italy_percent_afford_unexpected_expenses,belgium_percent_afford_unexpected_expenses,denmark_percent_afford_unexpected_expenses,sweden_percent_afford_unexpected_expenses,germany_trust_in_legal_system,france_trust_in_legal_system,turkey_trust_in_legal_system,uk_trust_in_legal_system,italy_trust_in_legal_system,belgium_trust_in_legal_system,denmark_trust_in_legal_system,sweden_trust_in_legal_system,musk_num_tweets,musk_num_pos_tweets,musk_num_neg_tweets,musk_num_neut_tweets,musk_percent_pos,musk_percent_neg,musk_percent_neut
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1,Unnamed: 64_level_1,Unnamed: 65_level_1,Unnamed: 66_level_1,Unnamed: 67_level_1,Unnamed: 68_level_1,Unnamed: 69_level_1,Unnamed: 70_level_1,Unnamed: 71_level_1,Unnamed: 72_level_1,Unnamed: 73_level_1,Unnamed: 74_level_1,Unnamed: 75_level_1,Unnamed: 76_level_1,Unnamed: 77_level_1,Unnamed: 78_level_1,Unnamed: 79_level_1,Unnamed: 80_level_1,Unnamed: 81_level_1,Unnamed: 82_level_1,Unnamed: 83_level_1,Unnamed: 84_level_1,Unnamed: 85_level_1,Unnamed: 86_level_1,Unnamed: 87_level_1,Unnamed: 88_level_1,Unnamed: 89_level_1,Unnamed: 90_level_1,Unnamed: 91_level_1,Unnamed: 92_level_1,Unnamed: 93_level_1,Unnamed: 94_level_1,Unnamed: 95_level_1,Unnamed: 96_level_1,Unnamed: 97_level_1,Unnamed: 98_level_1,Unnamed: 99_level_1,Unnamed: 100_level_1,Unnamed: 101_level_1
2009-01-01,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-02,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-03,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-04,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2009-01-05,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,...,,-18.8,-4.2,17.0,,-7.7,-25.8,-18.8,-18.7,,-56.1,-22.0,-10.4,-30.6,-51.0,79.0,69.0,30.0,77.0,53.0,67.0,83.0,86.0,,,,32.4,32.7,33.5,56.5,30.2,31.7,25.3,32.2,25.1,,,,,,,,,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [65]:
merged_df.to_csv(os.path.join(data_directory, "all_scraped_data.csv"))