In [10]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [11]:
# download ethereum data from yfinance
eth = yf.download('ETH-USD', period='max', interval='1d')

# convert to pandas dataframe
eth = pd.DataFrame(eth)

eth.head()

[*********************100%***********************]  1 of 1 completed


Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2017-11-09,308.644989,329.451996,307.056,320.884003,320.884003,893249984
2017-11-10,320.67099,324.717987,294.541992,299.252991,299.252991,885985984
2017-11-11,298.585999,319.453003,298.191986,314.681,314.681,842300992
2017-11-12,314.690002,319.153015,298.513,307.90799,307.90799,1613479936
2017-11-13,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984


In [12]:
# add a column for day of the week
eth['day_of_week'] = eth.index.dayofweek

eth

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,day_of_week
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-11-09,308.644989,329.451996,307.056000,320.884003,320.884003,893249984,3
2017-11-10,320.670990,324.717987,294.541992,299.252991,299.252991,885985984,4
2017-11-11,298.585999,319.453003,298.191986,314.681000,314.681000,842300992,5
2017-11-12,314.690002,319.153015,298.513000,307.907990,307.907990,1613479936,6
2017-11-13,307.024994,328.415009,307.024994,316.716003,316.716003,1041889984,0
...,...,...,...,...,...,...,...
2023-01-18,1567.698975,1602.106689,1509.422852,1515.506958,1515.506958,10354880595,2
2023-01-19,1515.249634,1557.970337,1514.380005,1552.556519,1552.556519,6432638856,3
2023-01-20,1552.373657,1659.885742,1544.917847,1659.754150,1659.754150,8528894754,4
2023-01-21,1659.706055,1674.179321,1626.812988,1627.118164,1627.118164,8859250310,5


In [13]:
# change all the values to percentage change
eth['Open'] = eth['Open'].pct_change()
eth['High'] = eth['High'].pct_change()
eth['Low'] = eth['Low'].pct_change()
eth['Close'] = eth['Close'].pct_change()
eth['Volume'] = eth['Volume'].pct_change()
eth['Adj Close'] = eth['Adj Close'].pct_change()

eth.head()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,day_of_week
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-11-09,,,,,,,3
2017-11-10,0.038964,-0.014369,-0.040755,-0.067411,-0.067411,-0.008132,4
2017-11-11,-0.068871,-0.016214,0.012392,0.051555,0.051555,-0.049307,5
2017-11-12,0.053934,-0.000939,0.001077,-0.021523,-0.021523,0.915562,6
2017-11-13,-0.024357,0.029021,0.028515,0.028606,0.028606,-0.354259,0


In [9]:
eth['Adj Close'] = eth['Adj Close'].shift(-1)

eth

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume,day_of_week
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-11-09,,,,,,,3
2017-11-10,0.038964,-0.014369,-0.040755,-0.067411,320.884003,-0.008132,4
2017-11-11,-0.068871,-0.016214,0.012392,0.051555,299.252991,-0.049307,5
2017-11-12,0.053934,-0.000939,0.001077,-0.021523,314.681000,0.915562,6
2017-11-13,-0.024357,0.029021,0.028515,0.028606,307.907990,-0.354259,0
...,...,...,...,...,...,...,...
2023-01-18,-0.005966,0.005085,-0.028270,-0.033383,1567.846069,0.362581,2
2023-01-19,-0.033456,-0.027549,0.003284,0.024447,1515.506958,-0.378782,3
2023-01-20,0.024500,0.065415,0.020165,0.069046,1552.556519,0.325878,4
2023-01-21,0.069141,0.008611,0.053009,-0.019663,1659.754150,0.038734,5


In [4]:
# drop adj close column
eth = eth.drop(['Close'], axis=1)

# drop nan values
eth = eth.dropna()

In [5]:
# calculate the number on infs
eth.isin([np.inf, -np.inf]).sum()

Open         0
High         0
Low          0
Close        0
Volume    2383
dtype: int64

In [6]:
# change all the infs to 0
eth = eth.replace([np.inf, -np.inf], 0)

In [7]:
# split the data into x and y train and test using built-in function
from sklearn.model_selection import train_test_split

x = eth.drop(['Adj Close'], axis=1)
y = eth['Adj Close']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42, shuffle=False)

# scale the data
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)

In [9]:
# LSTM model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, LSTM
from sklearn.metrics import mean_absolute_error, mean_squared_error
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.wrappers.scikit_learn import KerasRegressor
from tensorflow.keras.optimizers import Adam

def create_model(optimizer='adam', dropout_rate=0.2, num_neurons=50):
    model = Sequential()
    model.add(LSTM(num_neurons, activation="relu", return_sequences=True))
    model.add(Dropout(dropout_rate))
    model.add(LSTM(num_neurons, activation="relu", return_sequences=False))
    model.add(Dropout(dropout_rate))
    model.add(Dense(25))
    model.add(Dense(1))
    model.compile(optimizer=optimizer, loss='mean_absolute_error')
    return model

# Reshape the data
x_train = x_train.reshape(x_train.shape[0], 1, x_train.shape[1])
x_test = x_test.reshape(x_test.shape[0], 1, x_test.shape[1])

optimizer = Adam(learning_rate=0.001)

model = create_model(optimizer='adam', dropout_rate=0.0, num_neurons=100)
model.fit(x_train, y_train, epochs=10, batch_size=64, validation_data=(x_test, y_test), shuffle=False)

# predict the values
y_pred = model.predict(x_test)

# print the mean absolute error
print(mean_absolute_error(y_test, y_pred))

2023-01-21 16:06:37.636495: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-01-21 16:06:37.636554: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


(8034, 288, 1)


  model = KerasRegressor(build_fn=create_model, verbose=0)
2023-01-21 16:06:44.610546: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-01-21 16:06:44.611376: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-01-21 16:06:44.611479: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2023-01-21 16:06:44.611499: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2023-01-21 16:06:44.620682: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so

KeyboardInterrupt: 

In [None]:
    # plot the loss
    plt.plot(model.history.history['loss'], label='train')
    plt.plot(model.history.history['val_loss'], label='test')
    plt.legend()
    plt.show()

In [None]:
# plot the prediction and the test set with test set index
plt.figure(figsize=(20, 10))
plt.plot(y_test.index, y_test, label='Test Set')
plt.plot(y_test.index, y_pred, label='Prediction')
plt.legend()
plt.show()