# Part 1. Feature Engineering

Indicators are tools that help an investor or a trader to make a decision whether to buy stock or sell. Technical indicators (which can be called features in this context) constructed from stock data, such as price or volume. In this part we will create following features: Bollinger Bands, RSI, MACD, Moving Average, Return, Momentum, Change and Volatility.
Return will serve as a target or dependent variable. Other features will serve as independent variables.

### Importing Libraries 

In [5]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from importlib import reload
import datetime
from features_engineering import ma7, ma21, bollinger_bands

warnings.filterwarnings('ignore')

### Original data


In [7]:
#getting historical data for bitcoin
bit_data=pd.read_csv('coin_Bitcoin.csv',header=0, parse_dates=[0])

In [8]:
bit_data.head()

Unnamed: 0,SNo,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap
0,1,Bitcoin,BTC,2013-04-29 23:59:59,147.488007,134.0,134.444,144.539993,0.0,1603769000.0
1,2,Bitcoin,BTC,2013-04-30 23:59:59,146.929993,134.050003,144.0,139.0,0.0,1542813000.0
2,3,Bitcoin,BTC,2013-05-01 23:59:59,139.889999,107.720001,139.0,116.989998,0.0,1298955000.0
3,4,Bitcoin,BTC,2013-05-02 23:59:59,125.599998,92.281898,116.379997,105.209999,0.0,1168517000.0
4,5,Bitcoin,BTC,2013-05-03 23:59:59,108.127998,79.099998,106.25,97.75,0.0,1085995000.0


In [9]:
bit_data.describe()

Unnamed: 0,High,Low,Open,Close,Volume,Marketcap
count,2862.0,2862.0,2862.0,2862.0,2862.0,2862.0
mean,4974.040239,4695.103027,4836.306834,4852.092547,8978475000.0,85916220000.0
std,7188.836678,6667.197596,6933.573446,6975.105869,16581350000.0,128741400000.0
min,74.561096,65.526001,68.504997,68.431,0.0,778411200.0
25%,426.047752,415.675751,421.204506,420.989243,27862500.0,5988997000.0
50%,1197.334961,1164.174988,1180.100037,1182.809998,330195000.0,19242380000.0
75%,8138.046589,7703.3575,7924.612338,7926.696939,12967430000.0,138765800000.0
max,58330.572142,55672.609513,57532.738864,57539.943668,350967900000.0,1072263000000.0


In [10]:
bit_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2862 entries, 0 to 2861
Data columns (total 10 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   SNo        2862 non-null   object 
 1   Name       2862 non-null   object 
 2   Symbol     2862 non-null   object 
 3   Date       2862 non-null   object 
 4   High       2862 non-null   float64
 5   Low        2862 non-null   float64
 6   Open       2862 non-null   float64
 7   Close      2862 non-null   float64
 8   Volume     2862 non-null   float64
 9   Marketcap  2862 non-null   float64
dtypes: float64(6), object(4)
memory usage: 223.7+ KB


In [11]:
bit_data = bit_data.set_index(pd.DatetimeIndex(bit_data['Date'])).drop('Date', axis=1).drop('SNo', axis=1)

In [12]:
bit_data.head()

Unnamed: 0_level_0,Name,Symbol,High,Low,Open,Close,Volume,Marketcap
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2013-04-29 23:59:59,Bitcoin,BTC,147.488007,134.0,134.444,144.539993,0.0,1603769000.0
2013-04-30 23:59:59,Bitcoin,BTC,146.929993,134.050003,144.0,139.0,0.0,1542813000.0
2013-05-01 23:59:59,Bitcoin,BTC,139.889999,107.720001,139.0,116.989998,0.0,1298955000.0
2013-05-02 23:59:59,Bitcoin,BTC,125.599998,92.281898,116.379997,105.209999,0.0,1168517000.0
2013-05-03 23:59:59,Bitcoin,BTC,108.127998,79.099998,106.25,97.75,0.0,1085995000.0


### Cheaking for missing values


In [13]:
print('No missing data') if sum(bit_data.isna().sum()) == 0 else bit_data.isna().sum()

No missing data


### Generating features

In [14]:
def momentum(data, n_days):
    m = [None for i in range(n_days)]    
    for i in range(len(data) - n_days):
        end = i + n_days
        m.append(data[i] - n_days)
    return m

In [15]:
# TECHNICAL INDICATORS
# Return Feature
bit_data['Return'] = round(bit_data['Close'] / bit_data['Open'] - 1, 3)
# Change Feature
# Change of the price from previous day, absolute value
bit_data['Change'] = (bit_data.Close - bit_data.Close.shift(1)).fillna(0)
# Volatility Feature
bit_data['Volatility'] = bit_data.Close.ewm(21).std()
# Moving Average, 7 days
bit_data['MA7'] = ma7(bit_data)
# Moving Average, 21 days
bit_data['MA21'] = ma21(bit_data)
# Momentum
bit_data['Momentum'] = momentum(bit_data.Close, 3)
# RSI (Relative Strength Index)
#bit_data['RSI'] =  relativeStrengthIndex(bit_data.Close, 14)
# MACD - (Moving Average Convergence/Divergence)
bit_data['MACD'] = bit_data.Close.ewm(span=12, adjust=False).mean() - bit_data.Close.ewm(span=26, adjust=False).mean()
bit_data['Signal'] = bit_data['MACD'].ewm(span=9, adjust=False).mean()
# Upper Band and Lower Band for Bollinger Bands
bit_data['Upper_band'], bit_data['Lower_band'] = bollinger_bands(bit_data)
bit_data.dropna(inplace=True)

# Saving
bit_data.to_csv('/Users/dashavasileva/Desktop/spring 2021/ML/project-altcoinpriceprediction/project-altcoinpriceprediction/VasilevaDO/bit_data.csv')

Mostly we will rely on historical data and technical indicators. Additionally, we will use news headlines of Bitcoin to check hypothesis if news affect price movement.

In [16]:
bit_data.head()

Unnamed: 0_level_0,Name,Symbol,High,Low,Open,Close,Volume,Marketcap,Return,Change,Volatility,MA7,MA21,Momentum,MACD,Signal,Upper_band,Lower_band
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2013-05-19 23:59:59,Bitcoin,BTC,124.5,119.570999,123.210999,121.989998,0.0,1363205000.0,-0.01,-1.508003,8.471155,118.709001,117.159143,115.760002,-3.645462,-5.658709,137.200466,97.117819
2013-05-20 23:59:59,Bitcoin,BTC,123.621002,120.120003,122.5,122.0,0.0,1363710000.0,-0.004,0.010002,8.256357,119.283286,116.08581,120.014999,-3.142245,-5.155416,131.946473,100.225146
2013-05-21 23:59:59,Bitcoin,BTC,123.0,121.209999,122.019997,122.879997,0.0,1374013000.0,0.007,0.879997,8.080808,120.909,115.31819,120.498001,-2.641979,-4.652728,127.699871,102.93651
2013-05-22 23:59:59,Bitcoin,BTC,124.000999,122.0,122.889999,123.889,0.0,1385779000.0,0.008,1.009003,7.948854,122.290285,115.646714,118.989998,-2.139434,-4.150069,128.569,102.724428
2013-05-23 23:59:59,Bitcoin,BTC,126.933998,123.099998,123.800003,126.699997,0.0,1417770000.0,0.023,2.810997,7.969404,123.42457,116.670048,119.0,-1.497082,-3.619472,129.52451,103.815585


In [167]:
bit_data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2842 entries, 2013-05-19 23:59:59 to 2021-02-27 23:59:59
Data columns (total 18 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   Name        2842 non-null   object 
 1   Symbol      2842 non-null   object 
 2   High        2842 non-null   float64
 3   Low         2842 non-null   float64
 4   Open        2842 non-null   float64
 5   Close       2842 non-null   float64
 6   Volume      2842 non-null   float64
 7   Marketcap   2842 non-null   float64
 8   Return      2842 non-null   float64
 9   Change      2842 non-null   float64
 10  Volatility  2842 non-null   float64
 11  MA7         2842 non-null   float64
 12  MA21        2842 non-null   float64
 13  Momentum    2842 non-null   float64
 14  MACD        2842 non-null   float64
 15  Signal      2842 non-null   float64
 16  Upper_band  2842 non-null   float64
 17  Lower_band  2842 non-null   float64
dtypes: float64(16), object(2)
memory