In [1]:
%matplotlib inline

from blockchain import statistics
import requests
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
import matplotlib
from sklearn import datasets, linear_model
#import xgboost as xgb
from sklearn.metrics import mean_squared_error
from sklearn.datasets import make_friedman1
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import train_test_split

In [2]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)

<h3> Glossary </h3> 
<p> DT = DAILY TRANSACTIONS</p>
<P> DUA = DAILY UNIQUE ADRESSES </P>
<P> DTV = DAILY TRANSACTION VOLUME </P>
<P> DDTV = DAILY DOLLAR TRANSACTION VOLUME </P>
<p> BIC = BITCOIN IN CIRCULATION </p>
<p> MP = MARKET PRICE </p>

In [3]:
DT_get = requests.get("https://blockchain.info/charts/n-transactions?timespan=all&format=json")
DUA_get = requests.get("https://blockchain.info/charts/n-unique-addresses?timespan=all&format=json")
DTV_get = requests.get("https://blockchain.info/charts/estimated-transaction-volume?timespan=all&format=json")
DDTV_get = requests.get("https://blockchain.info/charts/estimated-transaction-volume-usd?timespan=all&format=json")
BIC_get = requests.get("https://blockchain.info/charts/total-bitcoins?timespan=all&format=json")
MP_get = requests.get("https://blockchain.info/charts/market-price?timespan=all&format=json")

In [4]:
DT_json = DT_get.json()
DUA_json = DUA_get.json()
DTV_json = DTV_get.json()
DDTV_json = DDTV_get.json()
BIC_json = BIC_get.json()
MP_json = MP_get.json()

In [5]:
DT = pd.DataFrame(list(DT_json.values())[1])
DT.columns = ['Date', 'Transactions']

DUA = pd.DataFrame(list(DUA_json.values())[1])
DUA.columns = ['Date', 'UniqueAdresses']
DUA2 = DUA.drop('Date', axis=1)

DTV = pd.DataFrame(list(DTV_json.values())[1])
DTV.columns = ['Date', 'TransactionVolume']
DTV2 = DTV.drop('Date', axis=1)

DDTV = pd.DataFrame(list(DDTV_json.values())[1])
DDTV.columns = ['Date', 'DailyDollarTransactionVolume']
DDTV2 = DDTV.drop('Date', axis=1)

BIC = pd.DataFrame(list(BIC_json.values())[1])
BIC.columns = ['Date', 'BitCoinInCirculation']
BIC2 = BIC.drop('Date', axis=1)

MP = pd.DataFrame(list(MP_json.values())[1])
MP.columns = ['Date', 'MarketPrice']
MP2 = MP.drop('Date', axis=1)
MP['Date'] = pd.to_datetime(MP['Date'],unit='s')

In [6]:
DataSet = pd.concat([DT, DUA2], axis=1)
DataSet = pd.concat([DataSet, DTV2], axis=1)
DataSet = pd.concat([DataSet, DDTV2], axis=1)
DataSet = pd.concat([DataSet, BIC2], axis=1)
DataSet = pd.concat([DataSet, MP2], axis=1)
DataSet['Date'] = pd.to_datetime(DataSet['Date'],unit='s')
DataSet

Unnamed: 0,Date,Transactions,UniqueAdresses,TransactionVolume,DailyDollarTransactionVolume,BitCoinInCirculation,MarketPrice
0,2009-01-03,1,1,0.000,0.000,50.000,0.000
1,2009-01-05,0,0,0.000,0.000,50.000,0.000
2,2009-01-07,0,0,0.000,0.000,50.000,0.000
3,2009-01-09,14,14,0.000,0.000,750.000,0.000
4,2009-01-11,106,106,0.000,0.000,7600.000,0.000
5,2009-01-13,116,117,10.000,0.000,17800.000,0.000
6,2009-01-15,136,136,414.000,0.000,30450.000,0.000
7,2009-01-17,109,109,100.000,0.000,41650.000,0.000
8,2009-01-19,120,120,150.000,0.000,52650.000,0.000
9,2009-01-21,115,115,500.000,0.000,64100.000,0.000


In [7]:
DataSet = DataSet.drop('DailyDollarTransactionVolume', axis = 1)

In [8]:
DataSet.describe()

Unnamed: 0,Transactions,UniqueAdresses,TransactionVolume,BitCoinInCirculation,MarketPrice
count,1546.0,1546.0,1546.0,1546.0,1546.0
mean,75565.382,137424.393,173187.967,9695627.579,263.395
std,90722.379,168296.823,245321.945,5127033.777,392.925
min,0.0,0.0,0.0,50.0,0.0
25%,1869.25,2472.5,49823.5,5409487.5,0.79
50%,46869.5,47559.5,143228.5,10961262.5,73.835
75%,106323.0,224307.5,241245.0,14143631.25,427.947
max,367710.0,687290.0,5313161.0,16402262.5,2883.31


In [9]:
DataSet.corr()

Unnamed: 0,Transactions,UniqueAdresses,TransactionVolume,BitCoinInCirculation,MarketPrice
Transactions,1.0,0.987,0.294,0.798,0.79
UniqueAdresses,0.987,1.0,0.28,0.801,0.818
TransactionVolume,0.294,0.28,1.0,0.352,0.179
BitCoinInCirculation,0.798,0.801,0.352,1.0,0.65
MarketPrice,0.79,0.818,0.179,0.65,1.0


<h1> mmm, outliers could be removed</h1>
<h3> Transactions vs MarketPrice </h3>

In [None]:
plt.plot(DataSet.Transactions, DataSet.MarketPrice, 'ro')
plt.ylabel('Price (USD)')
plt.xlabel('Transactions Per DAY')
plt.show()

<h3> UniqueAdresses vs MarketPrice</h3>

In [None]:
plt.plot(DataSet.UniqueAdresses,DataSet.MarketPrice, 'ro')
plt.ylabel('Price (USD)')
plt.xlabel('UniqueAdresses')
plt.show()

<h3> TransactionVolume vs MarketPrice</h3>

In [None]:
plt.plot(DataSet.TransactionVolume, DataSet.MarketPrice, 'ro')
plt.ylabel('Price (USD)')
plt.xlabel('TransactionVolume')
plt.show()

<h3> BitCoinInCirculation vs MarketPrice</h3>

In [None]:
plt.plot(DataSet.BitCoinInCirculation, DataSet.MarketPrice, 'ro')
plt.ylabel('Price (USD)')
plt.xlabel('BitCoinInCirculation')
plt.show()

In [12]:
pre_train = DataSet
train_x = DataSet[['Transactions', 'UniqueAdresses','TransactionVolume','BitCoinInCirculation']]
train_y = DataSet['MarketPrice']

In [10]:
train_x = DataSet[['Transactions', 'UniqueAdresses','TransactionVolume','BitCoinInCirculation']]
train_y = DataSet['MarketPrice']

lm1 = linear_model.LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=1)
lm1.fit(train_x,train_y)
yhat = pd.DataFrame(lm1.predict(train_x))
yhat.columns = ['yhat']
lm1.score(train_x, train_y, sample_weight=None)




0.68230257076947054

In [None]:
plotter =  pd.concat([train_y, yhat], axis=1)
plotter
plt.plot(plotter.MarketPrice, plotter.yhat, 'ro')
plt.ylabel('Predicted')
plt.xlabel('Actual')
plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
plt.show()

<h3>7 day forecast</h3>

In [None]:
train_y_7 = train_y.shift(periods=-7, freq=None, axis=0)
pre_train = pd.concat([train_x,train_y_7], axis = 1).dropna()
train_x = pre_train[['Transactions', 'UniqueAdresses','TransactionVolume','BitCoinInCirculation']]
train_y = pre_train['MarketPrice']

lm1 = linear_model.LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=1)
lm1.fit(train_x,train_y)
yhat = pd.DataFrame(lm1.predict(train_x))
yhat.columns = ['yhat']
lm1.score(train_x, train_y, sample_weight=None)

plotter =  pd.concat([train_y, yhat], axis=1)
plotter
plt.plot(plotter.MarketPrice, plotter.yhat, 'ro')
plt.ylabel('Predicted')
plt.xlabel('Actual')
plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
lm1.score(train_x, train_y, sample_weight=None),plt.show()

<h3> 30 day forecast </h3>

In [None]:
train_y_30 = train_y.shift(periods=-30, freq=None, axis=0)
pre_train = pd.concat([train_x,train_y_30], axis = 1).dropna()
train_x = pre_train[['Transactions', 'UniqueAdresses','TransactionVolume','BitCoinInCirculation']]
train_y = pre_train['MarketPrice']

lm1 = linear_model.LinearRegression(fit_intercept=True, normalize=False, copy_X=True, n_jobs=1)
lm1.fit(train_x,train_y)
yhat = pd.DataFrame(lm1.predict(train_x))
yhat.columns = ['yhat']
lm1.score(train_x, train_y, sample_weight=None)

plotter =  pd.concat([train_y, yhat], axis=1)
plotter
plt.plot(plotter.MarketPrice, plotter.yhat, 'ro')
plt.ylabel('Predicted')
plt.xlabel('Actual')
plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
lm1.score(train_x, train_y, sample_weight=None),plt.show()

In [None]:
list_y = []
for i in range(0,len(train_y)-30):
    print(train_y[i],train_y[i+30])

<h3> Same but testing other algorithms</h3>

In [None]:
X_train, X_test, y_train, y_test = train_test_split( train_x, train_y, test_size=0.20, random_state=42)

In [None]:
"""
Boosting Tree Regressor - Note Looking at the analysis this doesn't really look healthy to me. 
Besides the fact that it can now predict higher than 1300 dollar

"""


BTR = GradientBoostingRegressor(n_estimators=100, learning_rate=0.1,max_depth=6, random_state=0, loss='ls').fit(X_train, y_train)
mean_squared_error(y_test, BTR.predict(X_test)) 
BTR.score(X_train, y_train, sample_weight=None),BTR.score(X_test, y_test, sample_weight=None)




In [None]:
yhat = pd.DataFrame(BTR.predict(X_train))
yhat.columns = ['yhat']
lm1.score(train_x, train_y, sample_weight=None)

plotter =  pd.concat([train_y, yhat], axis=1)
plotter
plt.plot(plotter.MarketPrice, plotter.yhat, 'ro')
plt.ylabel('Predicted')
plt.xlabel('Actual')
plt.figure(num=None, figsize=(8, 6), dpi=80, facecolor='w', edgecolor='k')
lm1.score(X_test, y_test, sample_weight=None)

<h1> start here </h1>

In [105]:

pre_train = DataSet
train_x = DataSet[['Transactions', 'UniqueAdresses','TransactionVolume','BitCoinInCirculation']]
train_y = DataSet['MarketPrice']

In [106]:
train_y_30 = train_y.shift(periods=-5, freq=None, axis=0)
pre_train = pd.concat([train_x,train_y,train_y_30], axis = 1).dropna()

In [108]:
pre_train.columns = ['Transactions','UniqueAdresses','TransactionVolume','BitCoinInCirculation','y_now','y_30']

In [109]:
pre_train['y'] = 0
#df['hour'] = df.apply(lambda x: find_hour(x['Dates']), axis=1)

In [110]:
len(pre_train)

1541

In [111]:
for i in range(0,len(pre_train)):
    if pre_train['y_30'][i] > pre_train['y_now'][i]:
        print(i)
        #print(pre_train['y_30'][i],pre_train['y_now'][i])
        pre_train['y'][i] = 1

291
292
293


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


294
295
305
317
318
319
320
321
324
325
326
327
328
329
330
331
332
333
334
335
338
339
351
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
390
401
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
432
433
434
435
436
437
438
439
440
441
442
448
449
450
462
463
472
473
475
476
490
493
509
510
511
512
513
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
561
562
569
570
571
572
576
578
579
580
581
587
588
589
590
594
595
596
597
598
599
600
606
607
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
631
632
635
636
637
638
639
640
641
642
643
644
645
648
649
650
651
652
653
654
655
656
663
665
667
668
669
670
671
672
673
674
675
676
677
679
680
681
697
698
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
719
723
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745


In [112]:
pre_train.describe()

Unnamed: 0,Transactions,UniqueAdresses,TransactionVolume,BitCoinInCirculation,y_now,y_30,y
count,1541.0,1541.0,1541.0,1541.0,1541.0,1541.0,1541.0
mean,74963.404,136138.731,172877.582,9673890.518,255.922,264.249,0.46
std,90237.571,167017.14,245648.987,5121096.478,370.902,393.275,0.499
min,0.0,0.0,0.0,50.0,0.0,0.0,0.0
25%,1842.0,2388.0,49554.0,5384450.0,0.79,0.85,0.0
50%,46764.0,46413.0,142755.0,10940875.0,67.85,76.48,0.0
75%,105878.0,222639.0,240583.0,14116950.0,426.06,428.0,1.0
max,367710.0,687290.0,5313161.0,16384087.5,2883.31,2883.31,1.0


In [113]:
pre_train_x = pre_train[['Transactions','UniqueAdresses','TransactionVolume','BitCoinInCirculation']]
pre_train_y = pre_train['y']

In [114]:
pre_train_x.shape,pre_train_y.shape

((1541, 4), (1541,))

In [115]:
X_train, X_test, y_train, y_test = train_test_split( pre_train_x, pre_train_y, test_size=0.20, random_state=42)

In [116]:
from sklearn.ensemble import RandomForestClassifier

In [119]:
I = [5,10,15,20,30,40,50,60,70,80,90,100]
for i in I:
    clf = RandomForestClassifier(n_estimators=i)
    clf.fit(X_train, y_train)
    print(i,clf.score(X_train, y_train),clf.score(X_test, y_test))

5 0.966720779221 0.744336569579
10 0.982142857143 0.766990291262
15 0.993506493506 0.750809061489
20 0.995941558442 0.783171521036
30 0.999188311688 0.766990291262
40 1.0 0.73786407767
50 1.0 0.754045307443
60 1.0 0.763754045307
70 0.999188311688 0.776699029126
80 1.0 0.766990291262
90 1.0 0.750809061489
100 1.0 0.760517799353


In [120]:

clf = RandomForestClassifier(n_estimators=50)
clf.fit(X_train, y_train)
print(i,clf.score(X_train, y_train),clf.score(X_test, y_test))

100 1.0 0.779935275081


In [121]:
for_scoring = DataSet[['Transactions','UniqueAdresses','TransactionVolume','BitCoinInCirculation']]
Price = DataSet[['MarketPrice']]
Date = DataSet[['Date']]

In [122]:
x = pd.DataFrame(clf.predict_proba(for_scoring)	)

In [123]:
x.columns = ['sell','buy']

In [129]:
simul = pd.concat([Date,Price,x],axis = 1)

In [130]:
simul
simul = simul[simul['Date'] >= '2017-04-01']

In [131]:
simul

Unnamed: 0,Date,MarketPrice,sell,buy
1505,2017-04-01,1086.92,0.02,0.98
1506,2017-04-03,1141.81,0.04,0.96
1507,2017-04-05,1133.07,0.0,1.0
1508,2017-04-07,1190.45,0.0,1.0
1509,2017-04-09,1208.8,0.0,1.0
1510,2017-04-11,1226.61,0.1,0.9
1511,2017-04-13,1180.02,0.0,1.0
1512,2017-04-15,1184.88,0.08,0.92
1513,2017-04-17,1205.63,0.02,0.98
1514,2017-04-19,1217.93,0.02,0.98


In [127]:
hold_init = 1000
hold_coins = 1000/simul.MarketPrice[1170]
final = hold_coins*simul.MarketPrice[1545]
final_strat = 14.31*simul.MarketPrice[1545]

In [128]:
print(final,final_strat)

12387.455036 39423.6207


In [139]:
starter = 1505
iterator = 5
btc_wallet = 0.0
rand_wallet = 1000.0

for index,row in simul.iterrows():
    print(index,starter,row['Date'],round(btc_wallet,2),round(rand_wallet,2), row['MarketPrice'],row['buy'])
    if index == starter:
        if row['buy'] > 0.5 and rand_wallet != 0:
            btc_wallet = rand_wallet/row['MarketPrice']
            rand_wallet = 0
        if row['buy'] < 0.5 and rand_wallet == 0:
            rand_wallet = btc_wallet*row['MarketPrice']
            btc_wallet = 0
            
        starter = starter + iterator
        
    

1505 1505 2017-04-01 00:00:00 0.0 1000.0 1086.92 0.98
1506 1510 2017-04-03 00:00:00 0.92 0 1141.81 0.96
1507 1510 2017-04-05 00:00:00 0.92 0 1133.07 1.0
1508 1510 2017-04-07 00:00:00 0.92 0 1190.45 1.0
1509 1510 2017-04-09 00:00:00 0.92 0 1208.8 1.0
1510 1510 2017-04-11 00:00:00 0.92 0 1226.61 0.9
1511 1515 2017-04-13 00:00:00 0.92 0 1180.02 1.0
1512 1515 2017-04-15 00:00:00 0.92 0 1184.88 0.92
1513 1515 2017-04-17 00:00:00 0.92 0 1205.63 0.98
1514 1515 2017-04-19 00:00:00 0.92 0 1217.93 0.98
1515 1515 2017-04-21 00:00:00 0.92 0 1258.36 1.0
1516 1520 2017-04-23 00:00:00 0.92 0 1257.98 0.98
1517 1520 2017-04-25 00:00:00 0.92 0 1279.41 0.98
1518 1520 2017-04-27 00:00:00 0.92 0 1345.35 0.88
1519 1520 2017-04-29 00:00:00 0.92 0 1334.97 0.98
1520 1520 2017-05-01 00:00:00 0.92 0 1417.17 1.0
1521 1525 2017-05-03 00:00:00 0.92 0 1507.57 1.0
1522 1525 2017-05-05 00:00:00 0.92 0 1533.33 0.94
1523 1525 2017-05-07 00:00:00 0.92 0 1535.86 1.0
1524 1525 2017-05-09 00:00:00 0.92 0 1721.28 0.98
1525 1

In [137]:
hold_init = 1000
hold_coins = 1000/simul.MarketPrice[1505]
final = hold_coins*simul.MarketPrice[1545]
final_strat = 14.31*simul.MarketPrice[1545]

78.0

In [138]:
print("new:   :","rand:  ",rand_wallet,"btc(in rand:)  " ,btc_wallet*simul.MarketPrice[1545],"hold:   ", final)

new:   : rand:   2617.8283590328633 btc(in rand:)   0.0 hold:    2534.65756449
