In [1]:
import pandas as pd
import numpy as np
import re
import tensorflow as tf
import tensorflow_hub as hub

In [2]:
from train_and_eval import *
from utilities import *

In [3]:
btc_daily = pd.read_csv('./time_series_data/btc_daily_all.csv')
btc_daily = clean_data(btc_daily,'btc')

In [4]:
eth_daily = pd.read_csv('./time_series_data/eth_daily_all.csv')
eth_daily = clean_data(eth_daily,'eth')

In [5]:
xrp_daily = pd.read_csv('./time_series_data/xrp_daily_all.csv')
xrp_daily = clean_data(xrp_daily,'xrp')

In [6]:
ltc_daily = pd.read_csv('./time_series_data/ltc_daily_all.csv')
ltc_daily = clean_data(ltc_daily,'ltc')

In [7]:
xmr_daily = pd.read_csv('./time_series_data/xmr_daily_all.csv')
xmr_daily = clean_data(xmr_daily,'xmr')

In [8]:
dash_daily = pd.read_csv('./time_series_data/dash_daily_all.csv')
dash_daily = clean_data(dash_daily,'dash')

In [9]:
xem_daily = pd.read_csv('./time_series_data/xem_daily_all.csv')
xem_daily = clean_data(xem_daily,'xem')

In [10]:
bcn_daily = pd.read_csv('./time_series_data/bcn_daily_all.csv')
bcn_daily = clean_data(bcn_daily,'bcn')

In [11]:
coin_total = btc_daily.join([eth_daily,xrp_daily,ltc_daily,xmr_daily,dash_daily,xem_daily,bcn_daily],how='left')

In [12]:
coin_complete = coin_total.dropna(axis=0)       # complete daily prices and volumes for 8 cryptocurrencies
                                                # could do some time series analysis on this part
                                                # add other indices later

In [13]:
sp500_daily = pd.read_csv('./time_series_data/^GSPC.csv')
sp500_daily = clean_data(sp500_daily,'sp500')

In [14]:
n225_daily = pd.read_csv('./time_series_data/^N225.csv')
n225_daily = clean_data(n225_daily,'n225')

In [15]:
sxxp_daily = pd.read_csv('./time_series_data/^SXXP.csv')
sxxp_daily = clean_data(sxxp_daily,'sxxp',True,False)

In [16]:
vix_daily = pd.read_csv('./time_series_data/^VIX.csv')
vix_daily = clean_data(vix_daily,'vix')

In [17]:
dxsq_daily = pd.read_csv('./time_series_data/DXSQ.F.csv')
dxsq_daily = clean_data(dxsq_daily,'dxsq')

In [18]:
metal_daily = pd.read_csv('./time_series_data/metal.csv')
metal_daily = metal_daily.iloc[:,[0,1,3]]
metal_daily['Date'] = pd.to_datetime(metal_daily['Date'],format="%Y-%m-%d")
metal_daily = metal_daily.set_index('Date')

In [19]:
int1mo_daily = pd.read_csv('./time_series_data/ustreasuryrates_1mo.csv')
int1mo_daily = clean_data(int1mo_daily,'',False,False)

In [20]:
int10y_daily = pd.read_csv('./time_series_data/ustreasuryrates_10y.csv')
int10y_daily = clean_data(int10y_daily,'',False,False)

In [21]:
eur_usd_daily = pd.read_csv('./time_series_data/EUR_USD.csv')
eur_usd_daily = clean_data(eur_usd_daily,'eur_usd',True,False)

In [22]:
usd_jpy_daily = pd.read_csv('./time_series_data/USD_JPY.csv')
usd_jpy_daily = clean_data(usd_jpy_daily,'usd_jpy',True,False)

In [23]:
features_total = btc_daily.join([sp500_daily,n225_daily,sxxp_daily,vix_daily,dxsq_daily,metal_daily,int1mo_daily,\
                                int10y_daily,eur_usd_daily,usd_jpy_daily],how='left')

In [24]:
features_complete = features_total.fillna(method='ffill').dropna(axis=0)

In [25]:
features_complete['n225_close'] = features_complete['n225_close']/features_complete['usd_jpy_close']
features_complete['sxxp_close'] = features_complete['sxxp_close']*features_complete['eur_usd_close']
features_complete['dxsq_close'] = features_complete['dxsq_close']*features_complete['eur_usd_close']
features_complete.drop(['eur_usd_close','usd_jpy_close'],axis=1,inplace=True)
for col in features_complete.columns:
    features_complete[col] = np.log(features_complete[col]+1).diff()
features_complete.dropna(axis=0,inplace=True)
features_complete['y'] = features_complete['btc_close'].apply(lambda x: 1 if x>0 else 0)

In [26]:
bitcoin_news1 = pd.read_csv('./texts_data/bitcoin_news.csv')
bitcoin_news1 = clean_texts(bitcoin_news1,['Summary','Title'])

In [27]:
bitcoin_news2 = pd.read_csv('./texts_data/bitcoinist_news_cleaned.csv')
bitcoin_news2 = clean_texts(bitcoin_news2,['Summary','Title'])

In [28]:
news_complete = pd.concat([bitcoin_news1[['Date','Title','Summary']],bitcoin_news2[['Date','Summary','Title']]]).reset_index(drop=True)

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


In [29]:
news_complete['Text'] = news_complete['Title']+' '+news_complete['Summary']

In [30]:
news_groupby = news_complete.groupby('Date')

In [31]:
new_dict = {'Date':[],'Text':[]}
for date, df in news_groupby:
    s = ''
    for t in df['Text']:
        s = s+' '+t
    s = s.strip()
    new_dict['Date'].append(date)
    new_dict['Text'].append(s)

In [32]:
news_complete2 = pd.DataFrame(new_dict).sort_values(by='Date').reset_index(drop=True)

In [33]:
news_complete2 = news_complete2.set_index('Date')

In [34]:
features_complete = features_complete.join(news_complete2,how='left').fillna('')

In [35]:
btc_hourly = pd.read_csv('./time_series_data/btc_hourly.txt')

btc_hourly = btc_hourly[['Date','Volume_Currency','Weighted_Price']]
btc_hourly.columns = ['Date','Volume','Price']

btc_hourly['Date'] = pd.to_datetime(btc_hourly['Date'],format="%Y-%m-%d")

In [36]:
price_picture=[]
volume_picture = []
timelist = []
btc_h_groupbydate = btc_hourly.groupby('Date')


In [37]:
for date,df in btc_h_groupbydate:
    timelist.append(date)
    vec_price = np.log(df['Price']).diff().iloc[1:]
    vec_vol = np.log(df['Volume']).diff().iloc[1:]
    price_picture.append(create2d(vec_price,23,12,12))
    volume_picture.append(create2d(vec_vol,23,12,12))

In [38]:
timelist = timelist[:-1]
price_picture = price_picture[:-1]
volume_picture = volume_picture[:-1]

In [39]:
price_picture = np.concatenate([np.zeros((1774,12,12)),np.array(price_picture)],axis = 0)
volume_picture = np.concatenate([np.zeros((1774,12,12)),np.array(volume_picture)],axis = 0)

In [40]:
total_picture = np.stack([price_picture,volume_picture],axis=3)

In [41]:
train(10,features_complete,total_picture)

INFO:tensorflow:Using C:\Users\shif3\AppData\Local\Temp\tfhub_modules to cache modules.
INFO:tensorflow:Initialize variable module/embeddings/part_0:0 from checkpoint b'C:\\Users\\shif3\\AppData\\Local\\Temp\\tfhub_modules\\7f07056e3a4c9f125d5bd920ef3883605d8556a8\\variables\\variables' with embeddings
Training Epoch : 0
Initialize variables!
Training Loss at step 20 : 0.6484193205833435
Training Loss at step 21 : 0.7469345927238464
Training Loss at step 22 : 0.6596718430519104
Training Loss at step 23 : 0.6418533325195312
Training Loss at step 24 : 0.6714564561843872
Initialize variables!
Test Loss at step : 0.7135860174894333
Training Epoch : 1
Training Loss at step 20 : 0.6466519832611084
Training Loss at step 21 : 0.7391294836997986
Training Loss at step 22 : 0.6585600972175598
Training Loss at step 23 : 0.6504451036453247
Training Loss at step 24 : 0.6742250919342041
Test Loss at step : 0.719560980796814
Training Epoch : 2
Training Loss at step 20 : 0.6520911455154419
Training Los

Exception ignored in: <bound method TF_Input.<lambda> of <tensorflow.python.pywrap_tensorflow_internal.TF_Input; proxy of <Swig Object of type 'TF_Input *' at 0x0000029E6860AAE0> >>
Traceback (most recent call last):
  File "D:\Users\shif3\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\pywrap_tensorflow_internal.py", line 845, in <lambda>
    __del__ = lambda self: None
KeyboardInterrupt: 


Training Loss at step 22 : 0.6622067093849182


Exception ignored in: <bound method TF_Output.<lambda> of <tensorflow.python.pywrap_tensorflow_internal.TF_Output; proxy of <Swig Object of type 'TF_Output *' at 0x0000029E64F3DAE0> >>
Traceback (most recent call last):
  File "D:\Users\shif3\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\pywrap_tensorflow_internal.py", line 871, in <lambda>
    __del__ = lambda self: None
KeyboardInterrupt


Training Loss at step 23 : 0.65553879737854


ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "D:\Users\shif3\Anaconda3\envs\tensorflow\lib\site-packages\IPython\core\interactiveshell.py", line 2961, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-41-7e53638cd128>", line 1, in <module>
    train(10,features_complete,total_picture)
  File "C:\Users\shif3\Desktop\capstone\train_and_eval.py", line 138, in train
    _,train_loss,state_f = sess.run([training_op,loss,states],feed_dict=feed_dict)
  File "D:\Users\shif3\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py", line 900, in run
    run_metadata_ptr)
  File "D:\Users\shif3\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py", line 1135, in _run
    feed_dict_tensor, options, run_metadata)
  File "D:\Users\shif3\Anaconda3\envs\tensorflow\lib\site-packages\tensorflow\python\client\session.py", line 1316, in _do_run
    run_metadata)
  File "D:\Users\shif3\Anaconda3\envs\tensorflow\lib\site-

KeyboardInterrupt: 