In [16]:
! pip install numpy --upgrade

Requirement already up-to-date: numpy in /home/olli/.local/lib/python3.8/site-packages (1.24.1)


In [17]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler
import pandas_ta as ta

In [18]:
# download ethereum data from yfinance
eth = yf.download('ETH-USD', start="2022-12-01", end="2022-12-31", interval='1h')

# convert to pandas dataframe
eth = pd.DataFrame(eth)

eth.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0,Open,High,Low,Close,Adj Close,Volume
2022-11-30 22:00:00+00:00,1295.510254,1295.908325,1293.83728,1294.780273,1294.780273,0
2022-11-30 23:00:00+00:00,1297.411255,1302.038696,1295.544067,1295.79187,1295.79187,333292544
2022-12-01 00:00:00+00:00,1295.769531,1295.999878,1288.971924,1290.010742,1290.010742,0
2022-12-01 01:00:00+00:00,1290.012085,1290.132202,1286.562256,1286.674072,1286.674072,0
2022-12-01 02:00:00+00:00,1286.557861,1288.012085,1285.000366,1286.933594,1286.933594,0


In [19]:
# change everything to pct change
eth = eth.pct_change()

# drop the first row
eth = eth.drop(eth.index[0])

# make the target to be 1 if the adj close is higher than the previous day, 0 if not
eth['target'] = np.where(eth['Adj Close'] > 0, 1, 0)

# shift target by 1
eth['target'] = eth['target'].shift(-1)

# drop the last row
eth = eth.drop(eth.index[-1])

In [20]:
eth.head

<bound method NDFrame.head of                                Open      High       Low     Close  Adj Close  \
2022-11-30 23:00:00+00:00  0.001467  0.004731  0.001319  0.000781   0.000781   
2022-12-01 00:00:00+00:00 -0.001265 -0.004638 -0.005073 -0.004461  -0.004461   
2022-12-01 01:00:00+00:00 -0.004443 -0.004528 -0.001869 -0.002587  -0.002587   
2022-12-01 02:00:00+00:00 -0.002678 -0.001643 -0.001214  0.000202   0.000202   
2022-12-01 03:00:00+00:00  0.000280  0.001553  0.001473  0.001406   0.001406   
...                             ...       ...       ...       ...        ...   
2022-12-30 16:00:00+00:00  0.005607 -0.000186  0.004348  0.000228   0.000228   
2022-12-30 17:00:00+00:00  0.000199 -0.000227  0.000569 -0.000621  -0.000621   
2022-12-30 18:00:00+00:00 -0.000692  0.001521  0.000191  0.002207   0.002207   
2022-12-30 19:00:00+00:00  0.002361  0.000163  0.000813 -0.001467  -0.001467   
2022-12-30 20:00:00+00:00 -0.001577 -0.000434 -0.000033  0.001286   0.001286   

         

In [21]:
# change inf to 0
eth = eth.replace([np.inf, -np.inf], 0)

eth.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,Volume,target
2022-11-30 23:00:00+00:00,0.001467,0.004731,0.001319,0.000781,0.000781,0.0,0.0
2022-12-01 00:00:00+00:00,-0.001265,-0.004638,-0.005073,-0.004461,-0.004461,-1.0,0.0
2022-12-01 01:00:00+00:00,-0.004443,-0.004528,-0.001869,-0.002587,-0.002587,,1.0
2022-12-01 02:00:00+00:00,-0.002678,-0.001643,-0.001214,0.000202,0.000202,,1.0
2022-12-01 03:00:00+00:00,0.00028,0.001553,0.001473,0.001406,0.001406,,0.0


In [22]:
# drop volume
eth = eth.drop(['Volume'], axis=1)

eth.head()

Unnamed: 0,Open,High,Low,Close,Adj Close,target
2022-11-30 23:00:00+00:00,0.001467,0.004731,0.001319,0.000781,0.000781,0.0
2022-12-01 00:00:00+00:00,-0.001265,-0.004638,-0.005073,-0.004461,-0.004461,0.0
2022-12-01 01:00:00+00:00,-0.004443,-0.004528,-0.001869,-0.002587,-0.002587,1.0
2022-12-01 02:00:00+00:00,-0.002678,-0.001643,-0.001214,0.000202,0.000202,1.0
2022-12-01 03:00:00+00:00,0.00028,0.001553,0.001473,0.001406,0.001406,0.0


In [23]:
# LSTM
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM

# split the data into train and test using built in function
from sklearn.model_selection import train_test_split

# split the data into train and test
X = eth.drop(['target'], axis=1)
y = eth['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, shuffle=False)

# scale the data
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# reshape the data
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# build the model
model = Sequential()

model.add(LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(25))
model.add(Dropout(0.2))
model.add(Dense(1))

# compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# train the model
model.fit(X_train, y_train, batch_size=64, epochs=1)

# evaluate the model
score = model.evaluate(X_test, y_test, verbose=0)
print(score)

[0.9692497253417969, 0.5]
