In [1]:
! pip install yfinance



In [2]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
# download ethereum data from yfinance
eth = yf.download('ETH-USD', start="2022-12-01", end="2022-12-31", interval='5m')

# convert to pandas dataframe
eth = pd.DataFrame(eth)

eth.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-11-30 22:00:00+00:00,1295.510254,1295.809082,1295.049683,1295.049683,1295.049683,0
2022-11-30 22:05:00+00:00,1295.099365,1295.763306,1295.093018,1295.763306,1295.763306,12383232
2022-11-30 22:10:00+00:00,1295.908325,1295.908325,1295.14856,1295.14856,1295.14856,3593216
2022-11-30 22:15:00+00:00,1295.0271,1295.0271,1294.24939,1294.24939,1294.24939,0
2022-11-30 22:20:00+00:00,1294.17749,1294.395996,1294.007812,1294.395996,1294.395996,11730944


In [4]:
# everything to pct change  
eth = eth.pct_change()

eth.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-11-30 22:00:00+00:00,,,,,,
2022-11-30 22:05:00+00:00,-0.000317,-3.5e-05,3.3e-05,0.000551,0.000551,inf
2022-11-30 22:10:00+00:00,0.000625,0.000112,4.3e-05,-0.000474,-0.000474,-0.709832
2022-11-30 22:15:00+00:00,-0.00068,-0.00068,-0.000694,-0.000694,-0.000694,-1.0
2022-11-30 22:20:00+00:00,-0.000656,-0.000487,-0.000187,0.000113,0.000113,inf


In [5]:
# change inf to 0
eth = eth.replace([np.inf, -np.inf], 0)

eth.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-11-30 22:00:00+00:00,,,,,,
2022-11-30 22:05:00+00:00,-0.000317,-3.5e-05,3.3e-05,0.000551,0.000551,0.0
2022-11-30 22:10:00+00:00,0.000625,0.000112,4.3e-05,-0.000474,-0.000474,-0.709832
2022-11-30 22:15:00+00:00,-0.00068,-0.00068,-0.000694,-0.000694,-0.000694,-1.0
2022-11-30 22:20:00+00:00,-0.000656,-0.000487,-0.000187,0.000113,0.000113,0.0


In [6]:
# drop close
eth = eth.drop(['Close'], axis=1)

# shift adj close by 1
eth['target'] = eth['Adj Close'].shift(-1)

eth.head()

Unnamed: 0_level_0,Open,High,Low,Adj Close,Volume,target
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-11-30 22:00:00+00:00,,,,,,0.000551
2022-11-30 22:05:00+00:00,-0.000317,-3.5e-05,3.3e-05,0.000551,0.0,-0.000474
2022-11-30 22:10:00+00:00,0.000625,0.000112,4.3e-05,-0.000474,-0.709832,-0.000694
2022-11-30 22:15:00+00:00,-0.00068,-0.00068,-0.000694,-0.000694,-1.0,0.000113
2022-11-30 22:20:00+00:00,-0.000656,-0.000487,-0.000187,0.000113,0.0,0.000282


In [7]:
# change target to -1, 0 or 1
eth['target'] = eth['target'].apply(lambda x: 1 if x > 0 else 0)

eth.head()

Unnamed: 0_level_0,Open,High,Low,Adj Close,Volume,target
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2022-11-30 22:00:00+00:00,,,,,,1
2022-11-30 22:05:00+00:00,-0.000317,-3.5e-05,3.3e-05,0.000551,0.0,0
2022-11-30 22:10:00+00:00,0.000625,0.000112,4.3e-05,-0.000474,-0.709832,0
2022-11-30 22:15:00+00:00,-0.00068,-0.00068,-0.000694,-0.000694,-1.0,1
2022-11-30 22:20:00+00:00,-0.000656,-0.000487,-0.000187,0.000113,0.0,1


In [8]:
# print how many -1 and 1
eth['target'].value_counts()

0    4327
1    4256
Name: target, dtype: int64

In [9]:
# split into train and test using built-in function
from sklearn.model_selection import train_test_split

X = eth.drop(['target'], axis=1)
y = eth['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# scale the data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [10]:
print(X_test.shape)
print(y_test.shape)

(1717, 5)
(1717,)


In [None]:
# LSTM model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor

def create_model(optimizer='adam', dropout_rate=0.2, num_neurons=50):
    model = Sequential()
    model.add(LSTM(num_neurons, return_sequences=True))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(num_neurons, return_sequences=False))
    model.add(Dropout(dropout_rate))
    model.add(Dense(25))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics='acc')
    return model

# Create the model
model = KerasRegressor(build_fn=create_model, verbose=0)

# Define the grid search parameters
optimizer = ['Adam', 'RMSprop']
dropout_rate = [0.0, 0.1, 0.2, 0.3]
num_neurons = [50, 100, 150]
batch_size = [8, 16, 32, 64]
epochs = [10, 20, 30]
param_grid = dict(optimizer=optimizer, dropout_rate=dropout_rate, num_neurons=num_neurons, batch_size=batch_size, epochs=epochs)

# reshape the data
X_train = X_train.reshape(X_train.shape[0], 1, X_train.shape[1])
X_test = X_test.reshape(X_test.shape[0], 1, X_test.shape[1])

# Perform the grid search
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X_train, y_train, validation_data=(X_test, y_test))

# Print the best parameters and the corresponding score
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

In [None]:
# predict using best parameters
y_pred = grid_result.predict(X_test)

# import accuracy score
from sklearn.metrics import accuracy_score

# get accuracy score
print("Accuracy score: ", accuracy_score(y_test, y_pred))

# import classification report
from sklearn.metrics import classification_report

# get classification report
print(classification_report(y_test, y_pred))

# import confusion matrix
from sklearn.metrics import confusion_matrix

# get confusion matrix
print(confusion_matrix(y_test, y_pred))