In [112]:
import pandas as pd
import altair as alt
import datetime as datetime
import pandas_datareader as DataReader
import scipy
import sklearn
from sklearn.linear_model import LinearRegression

In [113]:
# Get the Bitcoin Historical Pricing Data
df = pd.read_csv("bitstampUSD.csv")
df.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
1,1325317980,,,,,,,
2,1325318040,,,,,,,
3,1325318100,,,,,,,
4,1325318160,,,,,,,


In [114]:
# Problems with the data set
# NaN values early on, data is by the second which doesn't correspond
# with most technical indicators and other financial instruments
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4363457 entries, 0 to 4363456
Data columns (total 8 columns):
 #   Column             Dtype  
---  ------             -----  
 0   Timestamp          int64  
 1   Open               float64
 2   High               float64
 3   Low                float64
 4   Close              float64
 5   Volume_(BTC)       float64
 6   Volume_(Currency)  float64
 7   Weighted_Price     float64
dtypes: float64(7), int64(1)
memory usage: 266.3 MB


In [115]:
df.describe()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
count,4363457.0,3126480.0,3126480.0,3126480.0,3126480.0,3126480.0,3126480.0,3126480.0
mean,1456469000.0,3674.656,3677.366,3671.73,3674.595,9.85504,28844.59,3674.57
std,75732960.0,3935.578,3939.077,3931.713,3935.49,32.29272,101027.7,3935.458
min,1325318000.0,3.8,3.8,1.5,1.5,0.0,0.0,3.8
25%,1390770000.0,410.0,410.24,409.83,410.0,0.398812,350.3759,409.9998
50%,1456610000.0,1175.14,1175.77,1174.825,1175.14,1.99,2620.491,1175.2
75%,1522062000.0,6931.175,6935.78,6926.79,6931.225,7.639098,17600.57,6931.18
max,1587514000.0,19665.76,19666.0,19649.96,19665.75,5853.852,7569437.0,19663.3


In [116]:
# what percent of set is null?
print(df.isnull().sum()*100/df.isnull().count())

Timestamp             0.000000
Open                 28.348555
High                 28.348555
Low                  28.348555
Close                28.348555
Volume_(BTC)         28.348555
Volume_(Currency)    28.348555
Weighted_Price       28.348555
dtype: float64


In [117]:
# we will ignore earilier dates with NaN values
bitcoin = df.dropna()
bitcoin

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.000000,4.390000
478,1325346600,4.39,4.39,4.39,4.39,48.000000,210.720000,4.390000
547,1325350740,4.50,4.57,4.50,4.57,37.862297,171.380338,4.526411
548,1325350800,4.58,4.58,4.58,4.58,9.000000,41.220000,4.580000
1224,1325391360,4.58,4.58,4.58,4.58,1.502000,6.879160,4.580000
...,...,...,...,...,...,...,...,...
4363452,1587513360,6847.97,6856.35,6847.97,6856.35,0.125174,858.128697,6855.498790
4363453,1587513420,6850.23,6856.13,6850.23,6850.89,1.224777,8396.781459,6855.763449
4363454,1587513480,6846.50,6857.45,6846.02,6857.45,7.089168,48533.089069,6846.090966
4363455,1587513540,6854.18,6854.98,6854.18,6854.98,0.012231,83.831604,6854.195090


In [118]:
# convert the timestamp variable to a datetime object
bitcoin['Timestamp']=pd.to_datetime(bitcoin['Timestamp'],unit='s')
# index by second
bitcoin.set_index('Timestamp', inplace = True)
# aggregate each group of seconds with by day for prediction, month for graphing
bitcoin_daily = bitcoin.resample('1D').mean()
bitcoin_monthly = bitcoin.resample('1M').mean()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [119]:
bitcoin_daily.head()

Unnamed: 0_level_0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-12-31,4.465,4.4825,4.465,4.4825,23.82947,106.330084,4.471603
2012-01-01,4.806667,4.806667,4.806667,4.806667,7.200667,35.25972,4.806667
2012-01-02,5.0,5.0,5.0,5.0,19.048,95.24,5.0
2012-01-03,5.2525,5.2525,5.2525,5.2525,11.00466,58.100651,5.2525
2012-01-04,5.2,5.223333,5.2,5.223333,11.914807,63.119577,5.208159


In [120]:
bitcoin_monthly

Unnamed: 0_level_0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2011-12-31,4.465000,4.482500,4.465000,4.482500,23.829470,106.330084,4.471603
2012-01-31,6.345389,6.348982,6.341218,6.346148,4.031777,25.168238,6.345955
2012-02-29,5.230208,5.231646,5.227036,5.228510,8.313993,42.239422,5.228443
2012-03-31,4.985481,4.986695,4.982580,4.983828,15.197791,76.509751,4.984397
2012-04-30,4.995171,4.996447,4.993763,4.995079,21.683913,108.218094,4.995091
...,...,...,...,...,...,...,...
2019-12-31,7240.451387,7244.225846,7236.783347,7240.441817,2.930000,21126.441888,7240.856314
2020-01-31,8324.445998,8329.409153,8320.019888,8324.543275,4.565741,38436.780169,8325.103326
2020-02-29,9646.123398,9653.150847,9640.673863,9646.404206,4.086245,39434.591482,9647.502249
2020-03-31,6935.356257,6942.307238,6928.244283,6935.202732,10.880775,67722.297847,6935.509451


In [121]:
# Set Index to column so it can be referenced in altair
bitcoin_monthly["Month"] = bitcoin_monthly.index
# Graph Bitcoin Price over time
alt.Chart(bitcoin_monthly).mark_line().encode(
    x = alt.X('Month', title='Year'),
    y = alt.Y('Close', axis=alt.Axis(format='$', title='Closing Price (USD)')),
    color=alt.value('orange'),
).properties(
    title='Bitcoin Historical Price'
).configure_title(
    fontSize=20,
    font='Courier',
    anchor='start',
    color='gray'
)

In [122]:
# Often we use basic market indicators to predict stock performance
# Is Bitcoin Coorelated with other instruments as a whole
start = bitcoin_monthly["Month"].iloc[0]
end = bitcoin_monthly["Month"].iloc[100]
bitcoin_monthly["S&P500"] = DataReader.DataReader(['sp500'], 'fred', start, end)
bitcoin_monthly["Nikkiei"] = DataReader.DataReader(['NIKKEI225'], 'fred', start, end)
#bitcoin_monthly["GDP"] = DataReader.DataReader('A191RL1Q225SBEA', 'fred', start, end)
bitcoin_monthly

Unnamed: 0_level_0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,Month,S&P500,Nikkiei
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2011-12-31,4.465000,4.482500,4.465000,4.482500,23.829470,106.330084,4.471603,2011-12-31,,
2012-01-31,6.345389,6.348982,6.341218,6.346148,4.031777,25.168238,6.345955,2012-01-31,1312.41,8802.51
2012-02-29,5.230208,5.231646,5.227036,5.228510,8.313993,42.239422,5.228443,2012-02-29,1365.68,9723.24
2012-03-31,4.985481,4.986695,4.982580,4.983828,15.197791,76.509751,4.984397,2012-03-31,,
2012-04-30,4.995171,4.996447,4.993763,4.995079,21.683913,108.218094,4.995091,2012-04-30,1397.91,
...,...,...,...,...,...,...,...,...,...,...
2019-12-31,7240.451387,7244.225846,7236.783347,7240.441817,2.930000,21126.441888,7240.856314,2019-12-31,3230.78,
2020-01-31,8324.445998,8329.409153,8320.019888,8324.543275,4.565741,38436.780169,8325.103326,2020-01-31,3225.52,23205.18
2020-02-29,9646.123398,9653.150847,9640.673863,9646.404206,4.086245,39434.591482,9647.502249,2020-02-29,,
2020-03-31,6935.356257,6942.307238,6928.244283,6935.202732,10.880775,67722.297847,6935.509451,2020-03-31,2584.59,18917.01


In [123]:
# is bitcoin coorelated with these indicators
bitcoin_monthly.corr()['Close'].iloc[7:]

S&P500     0.841347
Nikkiei    0.705686
Name: Close, dtype: float64

In [127]:
# Nicely visualized with chart below
bitcoin_sp = alt.Chart(bitcoin_monthly).mark_point().encode(
    x = alt.X('Close', axis=alt.Axis(format='$', title='Bitcoin (USD)')),
    y = alt.Y('S&P500',axis=alt.Axis(format='$', title='S&P (USD)')),
    color=alt.value('green'),
).mark_circle(size=30)
(bitcoin_sp + bitcoin_sp.transform_regression('Close', 'S&P500').mark_line()).configure_title(
    fontSize=20,
    font='Courier',
    anchor='start',
    color='gray'
).properties(
    title = "Coorelation Between Bitcoin, S&P 500"
)

In [130]:
bitcoin_sp = alt.Chart(bitcoin_monthly).mark_point().encode(
    x = alt.X('Close', axis=alt.Axis(format='$', title='Bitcoin (USD)')),
    y = alt.Y('Nikkiei',axis=alt.Axis(format='$', title='Nikkiei (Tokyo) (USD)')),
    color=alt.value('red'),
).mark_circle(size=30)
(bitcoin_sp + bitcoin_sp.transform_regression('Close', 'Nikkiei').mark_line()).configure_title(
    fontSize=20,
    font='Courier',
    anchor='start',
    color='gray'
).properties(
    title = "Coorelation Between Bitcoin, Nikkie Index"
)

In [137]:
# It seems like it is, so lets fit a model with these values
# first add data to day by day value
bitcoin_daily["day"] = bitcoin_daily.index


start = bitcoin_daily["day"].iloc[0]
end = bitcoin_daily["day"].iloc[3035]
bitcoin_daily["S&P500"] = DataReader.DataReader(['sp500'], 'fred', start, end)
bitcoin_daily["Nikkiei"] = DataReader.DataReader(['NIKKEI225'], 'fred', start, end)

In [138]:
print(bitcoin_daily.isnull().sum()*100/bitcoin_daily.isnull().count())

Open                  0.098814
High                  0.098814
Low                   0.098814
Close                 0.098814
Volume_(BTC)          0.098814
Volume_(Currency)     0.098814
Weighted_Price        0.098814
day                   0.000000
S&P500               31.192358
Nikkiei              33.036891
dtype: float64


In [142]:
# NaNs correspond to weekends/holidays, so we drop them
bitcoin_daily.dropna(inplace = True)

In [143]:
bitcoin_daily

Unnamed: 0_level_0,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price,day,S&P500,Nikkiei
Timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
2012-01-04,5.200000,5.223333,5.200000,5.223333,11.914807,63.119577,5.208159,2012-01-04,1277.30,8560.11
2012-01-05,6.281429,6.286190,6.281429,6.286190,4.514373,27.987370,6.284127,2012-01-05,1281.06,8488.71
2012-01-06,6.435000,6.445000,6.435000,6.445000,2.420196,15.914659,6.438999,2012-01-06,1277.81,8390.35
2012-01-10,6.603000,6.603000,6.603000,6.603000,2.076333,13.773469,6.603000,2012-01-10,1292.08,8422.26
2012-01-11,7.119583,7.121875,7.119583,7.121875,2.194978,15.479409,7.120685,2012-01-11,1292.48,8447.88
...,...,...,...,...,...,...,...,...,...,...
2020-04-16,6914.045839,6919.369179,6909.210340,6914.345924,11.130005,77496.548247,6914.666410,2020-04-16,2799.55,19290.20
2020-04-17,7071.958939,7075.071649,7068.929333,7072.234602,6.121590,43287.908348,7071.901788,2020-04-17,2874.56,19897.26
2020-04-20,7050.524520,7054.468401,7046.525416,7050.542267,8.077476,56419.883448,7050.526233,2020-04-20,2823.16,19669.12
2020-04-21,6867.094397,6869.864491,6864.215868,6867.164541,3.996382,27413.523629,6867.155476,2020-04-21,2736.56,19280.78


In [None]:
# start to make model