In [2]:
# Import libraries
import os
import sys
import requests

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import RandomizedSearchCV

from sklearn.metrics import accuracy_score, classification_report,precision_score

In [4]:
df=pd.read_csv('ETH-USD.csv')

In [5]:
df

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
0,2017-11-09,308.644989,329.451996,307.056000,320.884003,320.884003,893249984
1,2017-11-10,320.670990,324.717987,294.541992,299.252991,299.252991,885985984
2,2017-11-11,298.585999,319.453003,298.191986,314.681000,314.681000,842300992
3,2017-11-12,314.690002,319.153015,298.513000,307.907990,307.907990,1613479936
4,2017-11-13,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984
...,...,...,...,...,...,...,...
2336,2024-04-02,3504.818359,3506.962891,3215.985107,3277.234619,3277.234619,22076539151
2337,2024-04-03,3277.324219,3368.111572,3205.649170,3311.441895,3311.441895,16010734587
2338,2024-04-04,3311.495361,3443.207520,3253.319336,3330.040527,3330.040527,14476330517
2339,2024-04-05,3330.005859,3345.666504,3214.244141,3318.885254,3318.885254,15214447092


In [6]:
df = df[['Date','Open','High','Low','Close','Volume']]
# sort the values by symbol and then date
df.sort_values(by = ['Date'], inplace = True)

# calculate the change in price
df['change_in_price'] =df['Close'].diff()
df

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df.sort_values(by = ['Date'], inplace = True)


Unnamed: 0,Date,Open,High,Low,Close,Volume,change_in_price
0,2017-11-09,308.644989,329.451996,307.056000,320.884003,893249984,
1,2017-11-10,320.670990,324.717987,294.541992,299.252991,885985984,-21.631012
2,2017-11-11,298.585999,319.453003,298.191986,314.681000,842300992,15.428009
3,2017-11-12,314.690002,319.153015,298.513000,307.907990,1613479936,-6.773010
4,2017-11-13,307.024994,328.415009,307.024994,316.716003,1041889984,8.808013
...,...,...,...,...,...,...,...
2336,2024-04-02,3504.818359,3506.962891,3215.985107,3277.234619,22076539151,-227.795410
2337,2024-04-03,3277.324219,3368.111572,3205.649170,3311.441895,16010734587,34.207276
2338,2024-04-04,3311.495361,3443.207520,3253.319336,3330.040527,14476330517,18.598632
2339,2024-04-05,3330.005859,3345.666504,3214.244141,3318.885254,15214447092,-11.155273


In [7]:
n = 14

# First make a copy of the data frame twice
up_df, down_df = df[['Date','change_in_price']].copy(), df[['Date','change_in_price']].copy()

# For up days, if the change is less than 0 set to 0.
up_df.loc['change_in_price'] = up_df.loc[(up_df['change_in_price'] < 0), 'change_in_price'] = 0

# For down days, if the change is greater than 0 set to 0.
down_df.loc['change_in_price'] = down_df.loc[(down_df['change_in_price'] > 0), 'change_in_price'] = 0

# We need change in price to be absolute.
down_df['change_in_price'] = down_df['change_in_price'].abs()

# Calculate the EWMA (Exponential Weighted Moving Average), meaning older values are given less weight compared to newer values.
ewma_up = up_df['change_in_price'].transform(lambda x: x.ewm(span = n).mean())
ewma_down = down_df['change_in_price'].transform(lambda x: x.ewm(span = n).mean())

# Calculate the Relative Strength
relative_strength = ewma_up / ewma_down

# Calculate the Relative Strength Index
relative_strength_index = 100.0 - (100.0 / (1.0 + relative_strength))

# Add the info to the data frame.
df['down_days'] = down_df['change_in_price']
df['up_days'] = up_df['change_in_price']
df['RSI'] = relative_strength_index

# Display the head.
df.head(30)

Unnamed: 0,Date,Open,High,Low,Close,Volume,change_in_price,down_days,up_days,RSI
0,2017-11-09,308.644989,329.451996,307.056,320.884003,893249984,,,,
1,2017-11-10,320.67099,324.717987,294.541992,299.252991,885985984,-21.631012,21.631012,0.0,0.0
2,2017-11-11,298.585999,319.453003,298.191986,314.681,842300992,15.428009,0.0,15.428009,45.144288
3,2017-11-12,314.690002,319.153015,298.513,307.90799,1613479936,-6.77301,6.77301,0.0,36.742192
4,2017-11-13,307.024994,328.415009,307.024994,316.716003,1041889984,8.808013,0.0,8.808013,50.551753
5,2017-11-14,316.763,340.177002,316.763,337.631012,1069680000,20.915009,0.0,20.915009,69.058633
6,2017-11-15,337.963989,340.911987,329.812988,333.356995,722665984,-4.274017,4.274017,0.0,63.458502
7,2017-11-16,333.442993,336.158997,323.605988,330.924011,797254016,-2.432984,2.432984,0.0,60.249397
8,2017-11-17,330.166992,334.963989,327.52301,332.394012,621732992,1.470001,0.0,1.470001,61.603079
9,2017-11-18,331.980011,349.615997,327.687012,347.612,649638976,15.217988,0.0,15.217988,72.705827


In [10]:
# Calculate the Stochastic Oscillator
n = 14

# Make a copy of the high and low column.
low_14, high_14 = df[['Date','Low']].copy(), df[['Date','High']].copy()

# Group by symbol, then apply the rolling function and grab the Min and Max.
low_14 = low_14['Low'].transform(lambda x: x.rolling(window = n).min())
high_14 = high_14['High'].transform(lambda x: x.rolling(window = n).max())

# Calculate the Stochastic Oscillator.
k_percent = 100 * ((df['Close'] - low_14) / (high_14 - low_14))

# Add the info to the data frame.
df['low_14'] = low_14
df['high_14'] = high_14
df['k_percent'] = k_percent

# Display the head.
df.head(30)

Unnamed: 0,Date,Open,High,Low,Close,Volume,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent
0,2017-11-09,308.644989,329.451996,307.056,320.884003,893249984,,,,,,,
1,2017-11-10,320.67099,324.717987,294.541992,299.252991,885985984,-21.631012,21.631012,0.0,0.0,,,
2,2017-11-11,298.585999,319.453003,298.191986,314.681,842300992,15.428009,0.0,15.428009,45.144288,,,
3,2017-11-12,314.690002,319.153015,298.513,307.90799,1613479936,-6.77301,6.77301,0.0,36.742192,,,
4,2017-11-13,307.024994,328.415009,307.024994,316.716003,1041889984,8.808013,0.0,8.808013,50.551753,,,
5,2017-11-14,316.763,340.177002,316.763,337.631012,1069680000,20.915009,0.0,20.915009,69.058633,,,
6,2017-11-15,337.963989,340.911987,329.812988,333.356995,722665984,-4.274017,4.274017,0.0,63.458502,,,
7,2017-11-16,333.442993,336.158997,323.605988,330.924011,797254016,-2.432984,2.432984,0.0,60.249397,,,
8,2017-11-17,330.166992,334.963989,327.52301,332.394012,621732992,1.470001,0.0,1.470001,61.603079,,,
9,2017-11-18,331.980011,349.615997,327.687012,347.612,649638976,15.217988,0.0,15.217988,72.705827,,,


In [11]:
# Calculate the MACD
ema_26 = df['Close'].transform(lambda x: x.ewm(span = 26).mean())
ema_12 = df['Close'].transform(lambda x: x.ewm(span = 12).mean())
macd = ema_12 - ema_26

# Calculate the EMA
ema_9_macd = macd.ewm(span = 9).mean()

# Store the data in the data frame.
df['MACD'] = macd
df['MACD_EMA'] = ema_9_macd

# Print the head.
df.head(30)

Unnamed: 0,Date,Open,High,Low,Close,Volume,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,MACD,MACD_EMA
0,2017-11-09,308.644989,329.451996,307.056,320.884003,893249984,,,,,,,,0.0,0.0
1,2017-11-10,320.67099,324.717987,294.541992,299.252991,885985984,-21.631012,21.631012,0.0,0.0,,,,-0.485311,-0.269617
2,2017-11-11,298.585999,319.453003,298.191986,314.681,842300992,15.428009,0.0,15.428009,45.144288,,,,-0.139961,-0.21648
3,2017-11-12,314.690002,319.153015,298.513,307.90799,1613479936,-6.77301,6.77301,0.0,36.742192,,,,-0.223996,-0.219026
4,2017-11-13,307.024994,328.415009,307.024994,316.716003,1041889984,8.808013,0.0,8.808013,50.551753,,,,0.085547,-0.128422
5,2017-11-14,316.763,340.177002,316.763,337.631012,1069680000,20.915009,0.0,20.915009,69.058633,,,,1.164355,0.221992
6,2017-11-15,337.963989,340.911987,329.812988,333.356995,722665984,-4.274017,4.274017,0.0,63.458502,,,,1.642529,0.581492
7,2017-11-16,333.442993,336.158997,323.605988,330.924011,797254016,-2.432984,2.432984,0.0,60.249397,,,,1.821038,0.879378
8,2017-11-17,330.166992,334.963989,327.52301,332.394012,621732992,1.470001,0.0,1.470001,61.603079,,,,1.989876,1.135909
9,2017-11-18,331.980011,349.615997,327.687012,347.612,649638976,15.217988,0.0,15.217988,72.705827,,,,2.864563,1.523228


In [12]:
# Calculate the Price Rate of Change
n = 9

# Calculate the Rate of Change in the Price, and store it in the Data Frame.
df['Price_Rate_Of_Change'] = df['Close'].transform(lambda x: x.pct_change(periods = n))

# Print the first 30 rows
df.head(30)

Unnamed: 0,Date,Open,High,Low,Close,Volume,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,MACD,MACD_EMA,Price_Rate_Of_Change
0,2017-11-09,308.644989,329.451996,307.056,320.884003,893249984,,,,,,,,0.0,0.0,
1,2017-11-10,320.67099,324.717987,294.541992,299.252991,885985984,-21.631012,21.631012,0.0,0.0,,,,-0.485311,-0.269617,
2,2017-11-11,298.585999,319.453003,298.191986,314.681,842300992,15.428009,0.0,15.428009,45.144288,,,,-0.139961,-0.21648,
3,2017-11-12,314.690002,319.153015,298.513,307.90799,1613479936,-6.77301,6.77301,0.0,36.742192,,,,-0.223996,-0.219026,
4,2017-11-13,307.024994,328.415009,307.024994,316.716003,1041889984,8.808013,0.0,8.808013,50.551753,,,,0.085547,-0.128422,
5,2017-11-14,316.763,340.177002,316.763,337.631012,1069680000,20.915009,0.0,20.915009,69.058633,,,,1.164355,0.221992,
6,2017-11-15,337.963989,340.911987,329.812988,333.356995,722665984,-4.274017,4.274017,0.0,63.458502,,,,1.642529,0.581492,
7,2017-11-16,333.442993,336.158997,323.605988,330.924011,797254016,-2.432984,2.432984,0.0,60.249397,,,,1.821038,0.879378,
8,2017-11-17,330.166992,334.963989,327.52301,332.394012,621732992,1.470001,0.0,1.470001,61.603079,,,,1.989876,1.135909,
9,2017-11-18,331.980011,349.615997,327.687012,347.612,649638976,15.217988,0.0,15.217988,72.705827,,,,2.864563,1.523228,0.083295


In [13]:
df['Tomorow']=df['Close'].shift(-1)

In [14]:
df

Unnamed: 0,Date,Open,High,Low,Close,Volume,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,MACD,MACD_EMA,Price_Rate_Of_Change,Tomorow
0,2017-11-09,308.644989,329.451996,307.056000,320.884003,893249984,,,,,,,,0.000000,0.000000,,299.252991
1,2017-11-10,320.670990,324.717987,294.541992,299.252991,885985984,-21.631012,21.631012,0.000000,0.000000,,,,-0.485311,-0.269617,,314.681000
2,2017-11-11,298.585999,319.453003,298.191986,314.681000,842300992,15.428009,0.000000,15.428009,45.144288,,,,-0.139961,-0.216480,,307.907990
3,2017-11-12,314.690002,319.153015,298.513000,307.907990,1613479936,-6.773010,6.773010,0.000000,36.742192,,,,-0.223996,-0.219026,,316.716003
4,2017-11-13,307.024994,328.415009,307.024994,316.716003,1041889984,8.808013,0.000000,8.808013,50.551753,,,,0.085547,-0.128422,,337.631012
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2336,2024-04-02,3504.818359,3506.962891,3215.985107,3277.234619,22076539151,-227.795410,227.795410,0.000000,34.980634,3059.654785,3678.789795,35.142551,3.667934,34.094274,-0.051352,3311.441895
2337,2024-04-03,3277.324219,3368.111572,3205.649170,3311.441895,16010734587,34.207276,0.000000,34.207276,37.800799,3205.649170,3678.789795,22.359679,-11.440679,24.987283,-0.077820,3330.040527
2338,2024-04-04,3311.495361,3443.207520,3253.319336,3330.040527,14476330517,18.598632,0.000000,18.598632,39.448459,3205.649170,3678.789795,26.290568,-21.663882,15.657050,-0.071767,3318.885254
2339,2024-04-05,3330.005859,3345.666504,3214.244141,3318.885254,15214447092,-11.155273,11.155273,0.000000,38.738276,3205.649170,3678.789795,23.932860,-30.316511,6.462338,-0.051778,3336.524414


In [15]:
df['prediction']=(df['Tomorow']>df['Close']).astype(int)

In [16]:
df['prediction'].value_counts()

1    1199
0    1142
Name: prediction, dtype: int64

In [17]:
# We need to remove all rows that have an NaN value.
print('Before NaN Drop we have {} rows and {} columns'.format(df.shape[0], df.shape[1]))

# Any row that has a `NaN` value will be dropped.
df = df.dropna()

# Display how much we have left now.
print('After NaN Drop we have {} rows and {} columns'.format(df.shape[0], df.shape[1]))

# Print the head.
df.head()

Before NaN Drop we have 2341 rows and 18 columns
After NaN Drop we have 2327 rows and 18 columns


Unnamed: 0,Date,Open,High,Low,Close,Volume,change_in_price,down_days,up_days,RSI,low_14,high_14,k_percent,MACD,MACD_EMA,Price_Rate_Of_Change,Tomorow,prediction
13,2017-11-22,360.312012,381.420013,360.147003,380.652008,800819008,20.251007,0.0,20.251007,80.336586,294.541992,381.420013,99.115996,7.215027,4.143,0.201872,410.165985,1
14,2017-11-23,381.438995,425.548004,376.088013,410.165985,1845680000,29.513977,0.0,29.513977,86.827537,294.541992,425.548004,88.25854,10.017925,5.360834,0.214835,474.911011,1
15,2017-11-24,412.501007,480.972992,402.757996,474.911011,2292829952,64.745026,0.0,64.745026,92.823723,298.191986,480.972992,96.683473,15.945597,7.539099,0.424632,466.276001,0
16,2017-11-25,475.675995,485.191986,461.053009,466.276001,1422080000,-8.63501,8.63501,0.0,86.747033,298.513,485.191986,89.867105,19.734016,10.034269,0.409012,471.329987,1
17,2017-11-26,465.973999,472.722992,451.605988,471.329987,1197779968,5.053986,0.0,5.053986,87.308148,307.024994,485.191986,92.219659,22.711727,12.616274,0.417986,480.355011,1


In [18]:
model=RandomForestClassifier(n_estimators=100,min_samples_split=50,random_state=1)
train=df.iloc[:-200]
test=df[-200:]
predictors=["RSI","k_percent","MACD","MACD_EMA"]
model.fit(train[predictors],train['prediction'])

In [19]:
preds=model.predict(test[predictors])
preds=pd.Series(preds,index=test.index)
precision_score(test["prediction"],preds)

0.5959595959595959