# Part 1. Feature Engineering

Indicators are tools that help an investor or a trader to make a decision whether to buy stock or sell. Technical indicators (which can be called features in this context) constructed from stock data, such as price or volume. In this part we will create following features: Bollinger Bands, RSI, MACD, Moving Average, Return, Momentum, Change and Volatility.
Return will serve as a target or dependent variable. Other features will serve as independent variables.

### Importing Libraries 

In [55]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from importlib import reload
import datetime

from bs4 import BeautifulSoup
import requests
from nltk.sentiment.vader import SentimentIntensityAnalyzer
warnings.filterwarnings('ignore')

### Original data


In [139]:
#getting historical data for bitcoin
bit_data=pd.read_csv('coin_Bitcoin.csv',header=0, parse_dates=[0])

In [140]:
bit_data.head()

Unnamed: 0,SNo,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap
0,1,Bitcoin,BTC,2013-04-29 23:59:59,147.488007,134.0,134.444,144.539993,0.0,1603769000.0
1,2,Bitcoin,BTC,2013-04-30 23:59:59,146.929993,134.050003,144.0,139.0,0.0,1542813000.0
2,3,Bitcoin,BTC,2013-05-01 23:59:59,139.889999,107.720001,139.0,116.989998,0.0,1298955000.0
3,4,Bitcoin,BTC,2013-05-02 23:59:59,125.599998,92.281898,116.379997,105.209999,0.0,1168517000.0
4,5,Bitcoin,BTC,2013-05-03 23:59:59,108.127998,79.099998,106.25,97.75,0.0,1085995000.0


In [141]:
bit_data.describe()

Unnamed: 0,High,Low,Open,Close,Volume,Marketcap
count,2862.0,2862.0,2862.0,2862.0,2862.0,2862.0
mean,4974.040239,4695.103027,4836.306834,4852.092547,8978475000.0,85916220000.0
std,7188.836678,6667.197596,6933.573446,6975.105869,16581350000.0,128741400000.0
min,74.561096,65.526001,68.504997,68.431,0.0,778411200.0
25%,426.047752,415.675751,421.204506,420.989243,27862500.0,5988997000.0
50%,1197.334961,1164.174988,1180.100037,1182.809998,330195000.0,19242380000.0
75%,8138.046589,7703.3575,7924.612338,7926.696939,12967430000.0,138765800000.0
max,58330.572142,55672.609513,57532.738864,57539.943668,350967900000.0,1072263000000.0


### Cheaking for missing values


In [142]:
print('No missing data') if sum(bit_data.isna().sum()) == 0 else bit_data.isna().sum()

No missing data


In [143]:
bit_data = bit_data.drop('SNo', axis=1)
bit_data.head(10)

Unnamed: 0,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap
0,Bitcoin,BTC,2013-04-29 23:59:59,147.488007,134.0,134.444,144.539993,0.0,1603769000.0
1,Bitcoin,BTC,2013-04-30 23:59:59,146.929993,134.050003,144.0,139.0,0.0,1542813000.0
2,Bitcoin,BTC,2013-05-01 23:59:59,139.889999,107.720001,139.0,116.989998,0.0,1298955000.0
3,Bitcoin,BTC,2013-05-02 23:59:59,125.599998,92.281898,116.379997,105.209999,0.0,1168517000.0
4,Bitcoin,BTC,2013-05-03 23:59:59,108.127998,79.099998,106.25,97.75,0.0,1085995000.0
5,Bitcoin,BTC,2013-05-04 23:59:59,115.0,92.5,98.099998,112.5,0.0,1250317000.0
6,Bitcoin,BTC,2013-05-05 23:59:59,118.800003,107.142998,112.900002,115.910004,0.0,1288693000.0
7,Bitcoin,BTC,2013-05-06 23:59:59,124.663002,106.639999,115.980003,112.300003,0.0,1249023000.0
8,Bitcoin,BTC,2013-05-07 23:59:59,113.444,97.699997,112.25,111.5,0.0,1240594000.0
9,Bitcoin,BTC,2013-05-08 23:59:59,115.779999,109.599998,109.599998,113.566002,0.0,1264049000.0


### Generating features

In [144]:
num_training_days = int(bit_data.shape[0]*.7)
print('Number of training days: {}. Number of test days: {}.'.format(num_training_days, bit_data.shape[0]-num_training_days))


Number of training days: 2003. Number of test days: 859.


In [146]:
# TECHNICAL INDICATORS
# Return Feature
bit_data['Return'] = round(bit_data['Close'] / bit_data['Open'] - 1, 3)
# Change Feature
# Change of the price from previous day, absolute value
bit_data['Change'] = (bit_data.Close - bit_data.Close.shift(1)).fillna(0)
# Date Feature
bit_data['Date'] = bit_data.set_index(pd.DatetimeIndex(bit_data.Date)).drop('Date', axis=1)
#crypto[name].set_index('Date', inplace=True)
#df = df.set_index(pd.DatetimeIndex(df['Date'])).drop('Date', axis=1).drop('SNo', axis=1)

# Volatility Feature
bit_data['Volatility'] = bit_data.Close.ewm(21).std()
# Moving Average, 7 days
bit_data['MA7'] = bit_data.Close.rolling(window=7).mean()
# Moving Average, 21 days
bit_data['MA21'] = bit_data.Close.rolling(window=21).mean()
# Momentum
bit_data['Momentum'] = bit_data.Close-1
# Upper Band and Lower Band for Bollinger Bands
bit_data['20sd'] = bit_data.Close.rolling(window=20).std() 
bit_data['Upper_band'] = bit_data['MA21']+(bit_data['20sd']*2)
bit_data['Lower_band'] = bit_data['MA21']-(bit_data['20sd']*2)

# Saving
#bit_data.to_csv('/Users/dashavasileva/Desktop/spring 2021/ML/project-altcoinpriceprediction/VasilevaDO/data/'+name+'.csv')

Mostly we will rely on historical data and technical indicators. Additionally, we will use news headlines of Bitcoin to check hypothesis if news affect price movement.

In [147]:
bit_data.head()

Unnamed: 0,Name,Symbol,Date,High,Low,Open,Close,Volume,Marketcap,Return,Change,Volatility,MA7,MA21,Momentum,20sd,Upper_band,Lower_band
0,Bitcoin,BTC,,147.488007,134.0,134.444,144.539993,0.0,1603769000.0,0.075,0.0,,,,143.539993,,,
1,Bitcoin,BTC,,146.929993,134.050003,144.0,139.0,0.0,1542813000.0,-0.035,-5.539993,3.917367,,,138.0,,,
2,Bitcoin,BTC,,139.889999,107.720001,139.0,116.989998,0.0,1298955000.0,-0.158,-22.010002,14.690548,,,115.989998,,,
3,Bitcoin,BTC,,125.599998,92.281898,116.379997,105.209999,0.0,1168517000.0,-0.096,-11.779999,18.539924,,,104.209999,,,
4,Bitcoin,BTC,,108.127998,79.099998,106.25,97.75,0.0,1085995000.0,-0.08,-7.459999,20.444247,,,96.75,,,
