In [29]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime as dt
import plotly.offline as py
import plotly.express as px
import plotly.graph_objects as go

data= pd.read_csv('bitstampUSD_1-min_data_2012-01-01_to_2021-03-31.csv')

## Starting data

In [30]:
data.head()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,1325317920,4.39,4.39,4.39,4.39,0.455581,2.0,4.39
1,1325317980,,,,,,,
2,1325318040,,,,,,,
3,1325318100,,,,,,,
4,1325318160,,,,,,,


In [31]:
data.columns

Index(['Timestamp', 'Open', 'High', 'Low', 'Close', 'Volume_(BTC)',
       'Volume_(Currency)', 'Weighted_Price'],
      dtype='object')

In [32]:
data.describe()

Unnamed: 0,Timestamp,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
count,4857377.0,3613769.0,3613769.0,3613769.0,3613769.0,3613769.0,3613769.0,3613769.0
mean,1471301000.0,6009.024,6013.357,6004.488,6009.014,9.323249,41762.84,6008.935
std,84280190.0,8996.247,9003.521,8988.778,8996.36,30.54989,151824.8,8995.992
min,1325318000.0,3.8,3.8,1.5,1.5,0.0,0.0,3.8
25%,1398179000.0,443.86,444.0,443.52,443.86,0.4097759,452.1422,443.8306
50%,1471428000.0,3596.97,3598.19,3595.62,3597.0,1.979811,3810.124,3596.804
75%,1544288000.0,8627.27,8632.98,8621.09,8627.16,7.278216,25698.21,8627.637
max,1617149000.0,61763.56,61781.83,61673.55,61781.8,5853.852,13900670.0,61716.21


In [33]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4857377 entries, 0 to 4857376
Data columns (total 8 columns):
 #   Column             Dtype  
---  ------             -----  
 0   Timestamp          int64  
 1   Open               float64
 2   High               float64
 3   Low                float64
 4   Close              float64
 5   Volume_(BTC)       float64
 6   Volume_(Currency)  float64
 7   Weighted_Price     float64
dtypes: float64(7), int64(1)
memory usage: 296.5 MB


##  Data cleaning

In [34]:
data=data.dropna()

In [35]:
data.isna().sum()

Timestamp            0
Open                 0
High                 0
Low                  0
Close                0
Volume_(BTC)         0
Volume_(Currency)    0
Weighted_Price       0
dtype: int64

## Timestamp->Datetime and set index

In [36]:
data.Timestamp=pd.to_datetime(data.Timestamp,unit='s').apply(lambda x: x.date())
data=data.rename(columns={'Timestamp':'Date'})

Group rows accordingly for each column

In [37]:
group=pd.DataFrame()
group['High']=data[['High']].groupby(data.Date).max()
group['Low']=data[['Low']].groupby(data.Date).min()
group['Open']=data[['Open']].groupby(data.Date).first()
group['Close']=data[['Close']].groupby(data.Date).last()
group['Volume_(BTC)']=data[['Volume_(BTC)']].groupby(data.Date).sum()
group['Volume_(Currency)']=data[['Volume_(Currency)']].groupby(data.Date).sum()

In [38]:
data= data.groupby(data.Date).mean()
data.reset_index(inplace=True)

data['High']=np.array(group['High'])
data['Low']=np.array(group['Low'])
data['Open']=np.array(group['Open'])
data['Close']=np.array(group['Close'])
data['Volume_(BTC)']=np.array(group['Volume_(BTC)'])
data['Volume_(Currency)']=np.array(group['Volume_(Currency)'])

data

Unnamed: 0,Date,Open,High,Low,Close,Volume_(BTC),Volume_(Currency),Weighted_Price
0,2011-12-31,4.39,4.58,4.39,4.58,95.317878,4.253203e+02,4.471603
1,2012-01-01,4.58,5.00,4.58,5.00,21.602000,1.057792e+02,4.806667
2,2012-01-02,5.00,5.00,5.00,5.00,19.048000,9.524000e+01,5.000000
3,2012-01-03,5.32,5.32,5.14,5.29,88.037281,4.648052e+02,5.252500
4,2012-01-04,4.93,5.57,4.93,5.57,107.233260,5.680762e+02,5.208159
...,...,...,...,...,...,...,...,...
3371,2021-03-27,55081.26,56686.15,53948.35,55839.42,2609.967580,1.443661e+08,55193.357260
3372,2021-03-28,55817.85,56573.04,54677.51,55790.92,2073.449191,1.154652e+08,55832.958824
3373,2021-03-29,55790.28,58402.68,54892.42,57600.10,5360.425663,3.069515e+08,56913.993819
3374,2021-03-30,57623.66,59388.66,57011.00,58760.59,3392.338619,1.983618e+08,58346.912268


## Plotting

In [39]:
px.line(data,
        x='Date',
        y=['Open','Close','High','Low','Weighted_Price'],
        title='Price')

In [40]:
px.line(data,
        x='Date',
        y='Volume_(BTC)',
        title='Volume of BTC')

In [41]:
px.line(data,
        x='Date',
        y='Volume_(Currency)',
        title='Currency volume')

In [42]:
px.violin(data[['Open','Close','High','Low','Weighted_Price','Volume_(BTC)']])

In [43]:
px.violin(data[['Volume_(Currency)']])

## FB Prophet

In [44]:
data_vol_btc=data[['Date','High']]
data_vol_btc=data_vol_btc.rename(columns={'Date':'ds', 'High':'y'})
data_vol_btc

Unnamed: 0,ds,y
0,2011-12-31,4.58
1,2012-01-01,5.00
2,2012-01-02,5.00
3,2012-01-03,5.32
4,2012-01-04,5.57
...,...,...
3371,2021-03-27,56686.15
3372,2021-03-28,56573.04
3373,2021-03-29,58402.68
3374,2021-03-30,59388.66


In [45]:
data_vol_btc_test=data_vol_btc[len(data_vol_btc)-20:]
data_vol_btc_train=data_vol_btc[:-20]

In [46]:
from fbprophet import Prophet
fbp=Prophet(daily_seasonality=True)
fbp.fit(data_vol_btc_train)
future = fbp.make_future_dataframe(periods=20)
forecast =fbp.predict(future)


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



Initial log joint probability = -385.093
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
      99       7214.91     0.0394721       3140.19           1           1      125   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     199        7326.3    0.00350069       480.133      0.1363      0.6755      247   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     299       7337.18     0.0042112       302.705           1           1      369   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     328       7339.83   8.38018e-05       132.946   2.012e-07       0.001      444  LS failed, Hessian reset 
     399       7341.88   3.26564e-05       57.3261           1           1      541   
    Iter      log prob        ||dx||      ||grad||       alpha      alpha0  # evals  Notes 
     409       7341.96    4.1979e-05    


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.


The frame.append method is deprecated and will be removed from pandas in a future version. Use pandas.concat instead.



In [47]:
from fbprophet.plot import plot_plotly
fig= plot_plotly(fbp, forecast)
py.iplot(fig)

## Data Standardization function

In [48]:
def data_standardization(column):
        mean = column.mean()
        std = column.std()
        return (column-mean)/std

In [49]:
#Standardization of data
data_standardized=pd.DataFrame()
data_standardized['Close']=data_standardization(data[['Close']])
data_standardized['Open']=data_standardization(data[['Open']])
data_standardized['High']=data_standardization(data[['Low']])
data_standardized['Low']=data_standardization(data[['Low']])
data_standardized['Volume_(BTC)']=data_standardization(data[['Volume_(BTC)']])
data_standardized['Volume_(Currency)']=data_standardization(data[['Volume_(Currency)']])
data_standardized['Weighted_Price']=data_standardization(data[['Weighted_Price']])

## Neural Networks

Graph function

In [50]:
import torch
@torch.no_grad()
def graphPredictions(model, x, y, original):
        model.eval()
        predictions=[]
        actual=[]
        x.to('cpu')
        y.to('cpu')
        model.to('cpu')

        mean=original.mean()
        std=original.std()

        for j in range(len(x)):
                pred=model(x[j])
                pred=(pred*std)+mean
                act= (y[j]*std)+mean

                predictions.append(pred.tolist())
                actual.append(act.item())

        predictions=np.array(predictions)
        actual=np.array(actual)

        df=pd.DataFrame()

        df['Actual']=actual
        df['Predictions']=predictions

        px.line(df,y=['Actual','Predictions'], labels={'x':'Day', 'y':'Price'}).show()


Linear model

In [51]:

import torch.optim as optim
from torch import nn
def linear(inputs, outputs):
        x_train = torch.tensor(data_standardized[inputs].head(data.shape[0]-20).values , dtype=torch.float , device='cpu')
        x_test = torch.tensor(data_standardized[inputs].tail(20).values , dtype=torch.float , device='cpu')

        y_train = torch.tensor(data_standardized[outputs].head(data.shape[0]-20).values, dtype=torch.float, device='cpu')
        y_test = torch.tensor(data_standardized[outputs].tail(20).values, dtype=torch.float, device='cpu')

        model = nn.Sequential(
        nn.Linear(len(inputs),100),
        nn.ReLU(),
        nn.Linear(100,1)
        )
        model.to('cpu')

        criterion = torch.nn.MSELoss()
        optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.6)

        for epoch in range(7):
                totalLoss=0
                for i in range(len(x_train)):
                        ypred = model(x_train[i])
                        loss = criterion(ypred, y_train[i])
                        totalLoss+=loss.item()

                        optimizer.zero_grad()
                        loss.backward()
                        optimizer.step()
                print('Total loss is: ', totalLoss)
        print('Train:')
        graphPredictions(model, x_train, y_train, data[outputs])
        print('Test:')
        graphPredictions(model, x_test, y_test, data[outputs])

In [52]:
linear(['Open'], ['Close'])

Total loss is:  15.450080336299326
Total loss is:  14.484769754699943
Total loss is:  11.793854461466303
Total loss is:  11.860988725449555
Total loss is:  11.78487909887236
Total loss is:  11.700396513062124
Total loss is:  11.629327671807893
Train:


Test:


In [53]:
linear(['Open','Weighted_Price'], ['High'])

Total loss is:  12.36171542361743
Total loss is:  7.419442602552726
Total loss is:  6.1424724602120016
Total loss is:  5.629299505570575
Total loss is:  5.230993239988411
Total loss is:  4.933228739698569
Total loss is:  4.698394896387226
Train:


Test:


In [54]:
linear(['High'],['Low'])

Total loss is:  4.704677812858328
Total loss is:  2.6555144377302704
Total loss is:  0.6589495863702666
Total loss is:  0.5104328304781112
Total loss is:  0.37049024415634635
Total loss is:  0.27694658130734573
Total loss is:  0.21629383271717267
Train:


Test:


Epoch 1/10


2022-09-30 11:05:05.229805: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-09-30 11:05:05.230424: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-09-30 11:05:05.231289: I tensorflow/core/common_runtime/process_util.cc:146] Creating new thread pool with default inter op setting: 2. Tune using inter_op_parallelism_threads for best performance.
2022-09-30 11:05:05.272837: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-09-30 11:05:05.286743: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 3601665000 Hz


ValueError: in user code:

    /home/giwrgos/anaconda3/envs/workspace/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:805 train_function  *
        return step_function(self, iterator)
    /home/giwrgos/anaconda3/envs/workspace/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:795 step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    /home/giwrgos/anaconda3/envs/workspace/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:1259 run
        return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
    /home/giwrgos/anaconda3/envs/workspace/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:2730 call_for_each_replica
        return self._call_for_each_replica(fn, args, kwargs)
    /home/giwrgos/anaconda3/envs/workspace/lib/python3.8/site-packages/tensorflow/python/distribute/distribute_lib.py:3417 _call_for_each_replica
        return fn(*args, **kwargs)
    /home/giwrgos/anaconda3/envs/workspace/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:788 run_step  **
        outputs = model.train_step(data)
    /home/giwrgos/anaconda3/envs/workspace/lib/python3.8/site-packages/tensorflow/python/keras/engine/training.py:754 train_step
        y_pred = self(x, training=True)
    /home/giwrgos/anaconda3/envs/workspace/lib/python3.8/site-packages/tensorflow/python/keras/engine/base_layer.py:998 __call__
        input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
    /home/giwrgos/anaconda3/envs/workspace/lib/python3.8/site-packages/tensorflow/python/keras/engine/input_spec.py:219 assert_input_compatibility
        raise ValueError('Input ' + str(input_index) + ' of layer ' +

    ValueError: Input 0 of layer sequential is incompatible with the layer: expected ndim=3, found ndim=2. Full shape received: (None, 1)


## Conclusion

ML can be helpful for predicting Bitcoin/ Stock values, at the very minimum they can give hints for oncoming trends and also help identify the past ones. These models though cannot act as a decision maker stand-alone, more data should be taken into consideration like other cryptos/ stocks, political and technological news and even randomness.