## Imports

In [23]:
import os
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import LSTM, RNN, Dense, Activation, Dropout
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from dotenv import load_dotenv
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR

from eth import get_crypto_data


## Extraction

In [63]:
# Load Eth data
load_dotenv()
eth = get_crypto_data(auth_token=os.environ['api_key'])
data = eth['data']

In [65]:
df = pd.DataFrame(data)

In [108]:
# Save data as a csv file
df.to_csv('eth_data.csv')

In [66]:
df.head(10)

Unnamed: 0,priceUsd,time,date
0,12.666745158814132,1458000000000,2016-03-15T00:00:00.000Z
1,13.229743315323399,1458086400000,2016-03-16T00:00:00.000Z
2,11.523188458991475,1458172800000,2016-03-17T00:00:00.000Z
3,10.01013315193628,1458259200000,2016-03-18T00:00:00.000Z
4,10.5683854304196,1458345600000,2016-03-19T00:00:00.000Z
5,10.241410589804469,1458432000000,2016-03-20T00:00:00.000Z
6,11.13453807462231,1458518400000,2016-03-21T00:00:00.000Z
7,11.573329221004572,1458604800000,2016-03-22T00:00:00.000Z
8,11.958019662066508,1458691200000,2016-03-23T00:00:00.000Z
9,11.386694160507036,1458777600000,2016-03-24T00:00:00.000Z


In [93]:
df['date'] = pd.to_datetime(df['date'])

In [94]:
df = df.set_index(pd.DatetimeIndex(df['date'].values))

In [109]:
# Set Forecast  
future_days = 5

In [110]:
# New column for forecast price
df[str(future_days)+'_Day_Price_Forecast'] = df[['priceUsd']].shift(-future_days)

In [101]:
df[['date',str(future_days)+'_Day_Price_Forecast']]

Unnamed: 0,date,5_Day_Price_Forecast
1970-01-01 00:00:01.458000000,1970-01-01 00:00:01.458000000,10.2414105898044677
1970-01-01 00:00:01.458086400,1970-01-01 00:00:01.458086400,11.1345380746223103
1970-01-01 00:00:01.458172800,1970-01-01 00:00:01.458172800,11.5733292210045721
1970-01-01 00:00:01.458259200,1970-01-01 00:00:01.458259200,11.9580196620665088
1970-01-01 00:00:01.458345600,1970-01-01 00:00:01.458345600,11.3866941605070363
...,...,...
1970-01-01 00:00:01.685923200,1970-01-01 00:00:01.685923200,
1970-01-01 00:00:01.686009600,1970-01-01 00:00:01.686009600,
1970-01-01 00:00:01.686096000,1970-01-01 00:00:01.686096000,
1970-01-01 00:00:01.686182400,1970-01-01 00:00:01.686182400,


In [102]:
# Independent dataset
X = np.array(df[['priceUsd']])
X = X[:df.shape[0] - future_days]

In [103]:
# Dependent dataset
y = np.array(df[str(future_days)+'_Day_Price_Forecast'])
y = y[:-future_days]

In [111]:
# Split the data
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size= 0.2)

In [112]:
# Train SVR model
svr_rbf = SVR(kernel='rbf', C=1e3, gamma= 0.00001)
svr_rbf.fit(x_train, y_train)

In [113]:
# Predict on test samples
svr_rbf_confidence = svr_rbf.score(x_test, y_test)
print('svr_rbf accuracy:', svr_rbf_confidence)

svr_rbf accuracy: 0.9853785909335603
