# Imports

In [24]:
import numpy as np
import pandas as pd
import prep

# Dataset

In [25]:
df = pd.read_csv('../data/btc_hist.csv')
df_fng = pd.read_csv('../data/fng_hist.csv')
df.head(3)

Unnamed: 0,Date,Open,High,Low,Close,Volume
0,2014-09-17,465.864014,468.174011,452.421997,457.334015,21056800
1,2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200
2,2014-09-19,424.102997,427.834991,384.532013,394.79599,37919700


In [26]:
df.set_index('Date', inplace=True)

# Processing Bitcoin history data

## Checkpoint 1

In [27]:
df_feat = df.copy()

## Adding window

In [28]:
WINDOW = 37
HORIZON = 1
for i in range(WINDOW):
    df_feat[f'Close + {i+1}'] = df_feat['Close'].shift(i+1)
df_feat.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Close + 1,Close + 2,Close + 3,Close + 4,Close + 5,...,Close + 28,Close + 29,Close + 30,Close + 31,Close + 32,Close + 33,Close + 34,Close + 35,Close + 36,Close + 37
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,21056800,,,,,,...,,,,,,,,,,
2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200,457.334015,,,,,...,,,,,,,,,,
2014-09-19,424.102997,427.834991,384.532013,394.79599,37919700,424.440002,457.334015,,,,...,,,,,,,,,,


## Adding technical indicators

Adding MA 13 and 21 commonly used by many professional traders

In [29]:
df_feat['MA_13'] = df_feat['Close'].rolling(window=13).mean()
df_feat['MA_21'] = df_feat['Close'].rolling(window=21).mean()

Adding RSI of 3 with K of 5 and D of 3

In [30]:
df_feat['RSI_3'] = prep.calculate_rsi(df_feat['Close'], 3)

In [31]:
df_feat['%K'], df_feat['%D'] = prep.calculate_stochastic_oscillator(df_feat['Close'], 5, 3)

In [32]:
df_feat.head(3)

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Close + 1,Close + 2,Close + 3,Close + 4,Close + 5,...,Close + 33,Close + 34,Close + 35,Close + 36,Close + 37,MA_13,MA_21,RSI_3,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-09-17,465.864014,468.174011,452.421997,457.334015,21056800,,,,,,...,,,,,,,,,,
2014-09-18,456.859985,456.859985,413.104004,424.440002,34483200,457.334015,,,,,...,,,,,,,,,,
2014-09-19,424.102997,427.834991,384.532013,394.79599,37919700,424.440002,457.334015,,,,...,,,,,,,,0.0,,


dropping na values

In [33]:
df_feat.dropna(inplace=True)

In [34]:
df_feat

Unnamed: 0_level_0,Open,High,Low,Close,Volume,Close + 1,Close + 2,Close + 3,Close + 4,Close + 5,...,Close + 33,Close + 34,Close + 35,Close + 36,Close + 37,MA_13,MA_21,RSI_3,%K,%D
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2014-10-24,358.591003,364.345001,353.304993,358.345001,15585700,358.416992,383.157990,386.475006,382.845001,389.545990,...,398.821014,408.903992,394.795990,424.440002,457.334015,383.165229,368.505573,0.000000,0.000000,1.213558
2014-10-25,358.610992,359.860992,342.877014,347.270996,18127500,358.345001,358.416992,383.157990,386.475006,382.845001,...,402.152008,398.821014,408.903992,394.795990,424.440002,380.759228,369.382002,0.000000,0.000000,0.000000
2014-10-26,347.487000,359.221008,343.931000,354.704010,11272500,347.270996,358.345001,358.416992,383.157990,386.475006,...,435.790985,402.152008,398.821014,408.903992,394.795990,378.012306,371.010287,40.007589,20.712278,6.904093
2014-10-27,354.777008,358.631989,349.808990,352.989014,13033000,354.704010,347.270996,358.345001,358.416992,383.157990,...,423.204987,435.790985,402.152008,398.821014,408.903992,374.329153,372.101240,36.757038,51.301091,24.004456
2014-10-28,353.214996,359.984009,352.678986,357.618011,7845880,352.989014,354.704010,347.270996,358.345001,358.416992,...,411.574005,423.204987,435.790985,402.152008,398.821014,371.471076,373.121764,87.551750,93.435169,55.149513
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-16,66256.109375,66712.429688,64613.054688,65231.582031,31573077994,66267.492188,61552.789062,62901.449219,61448.394531,60793.710938,...,63821.472656,67195.867188,70060.609375,70587.882812,69139.015625,62818.882212,62460.877046,66.411065,78.504064,71.515421
2024-05-17,65231.296875,67459.460938,65119.316406,67051.875000,28031279310,65231.582031,66267.492188,61552.789062,62901.449219,61448.394531,...,65738.726562,63821.472656,67195.867188,70060.609375,70587.882812,63061.990084,62617.855841,86.317224,100.000000,92.834688
2024-05-18,67066.210938,67387.328125,66663.500000,66940.804688,16712277406,67051.875000,65231.582031,66267.492188,61552.789062,62901.449219,...,63426.210938,65738.726562,63821.472656,67195.867188,70060.609375,63285.810998,62785.554129,61.345643,97.980204,92.161423
2024-05-19,66937.929688,67694.296875,65937.179688,66278.367188,19249094538,66940.804688,67051.875000,65231.582031,66267.492188,61552.789062,...,63811.863281,63426.210938,65738.726562,63821.472656,67195.867188,63525.535457,62936.274926,70.178596,57.506411,85.162205


In [35]:
df_feat.to_csv('../data/btc_hist_processed.csv')

# Processing Fear and Greed data

## Encoding

In [36]:
df_fng['value_classification'].unique()

array(['Greed', 'Neutral', 'Fear', 'Extreme Greed', 'Extreme Fear'],
      dtype=object)

In [37]:
df_fng['value_classification'].replace({'Fear':1, 'Extreme Fear':2, 'Neutral':3, 'Greed':4, 'Extreme Greed':5}, inplace=True)

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df_fng['value_classification'].replace({'Fear':1, 'Extreme Fear':2, 'Neutral':3, 'Greed':4, 'Extreme Greed':5}, inplace=True)
  df_fng['value_classification'].replace({'Fear':1, 'Extreme Fear':2, 'Neutral':3, 'Greed':4, 'Extreme Greed':5}, inplace=True)


In [38]:
df_fng.to_csv('../data/fng_hist_processed.csv', index=False)