In [1]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import datetime
import quantstats as qs
import yfinance as yf
import warnings

from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.layers.recurrent import LSTM, GRU

from csv import writer

from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.linear_model import LogisticRegression

warnings.filterwarnings("ignore", category=RuntimeWarning)
warnings.filterwarnings("ignore", category=FutureWarning)
warnings.filterwarnings("ignore", category=UserWarning)


In [2]:
def computeClassification(actual):
    if (actual > 0):
        return 1
    else:
        return -1

def append_list_as_row(file_name, list_of_elem):
    # Open file in append mode
    with open(file_name, 'a+', newline='') as write_obj:
        # Create a writer object from csv module
        csv_writer = writer(write_obj)
        # Add contents of list as last row in the csv file
        csv_writer.writerow(list_of_elem)


In [3]:
symbols = ['BTC-USD']
start = "2014-01-29"
start_entry = "2020-03-28"
end = datetime.datetime.now()
interval = '1d'
start_interval = '1m' # IMPORTANT: ALWAYS leave this as 1m and do not change this. as it is for pulling last price on 1m


forward_prediction_period = 1 # how many forward days one wants to predict price, I would leave this alone at 1
lookback = 1


In [4]:
# set signals.csv header
header = ['Date','Symbol','Last Price','Cumulative Return','Sharpe Ratio',
          'Value at Risk(VaR)','Max Drawdown','Volatility',
          'Machine Learning AI Accuracy','Current Machine Learning Signal']

# Append a list as new line to an old csv file
append_list_as_row('Signals.csv', header)


In [5]:
symbol = symbols[0]

df = yf.download(symbol, start=start, end=end, interval=interval)
entry_df = yf.download(symbol, start=start_entry, end=end, interval=start_interval)


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed

1 Failed download:
- BTC-USD: 1m data not available for startTime=1585342800 and endTime=1649236138. Only 7 days worth of 1m granularity data are allowed to be fetched per request.


In [6]:
data_open = df['Open']
data_open = pd.DataFrame(data_open)
data_open = data_open.reset_index()
del data_open['Date']
print(data_open)

data_high = df['High']
data_high = pd.DataFrame(data_high)
data_high = data_high.reset_index()
del data_high['Date']
print(data_high)

data_low = df['Low']
data_low = pd.DataFrame(data_low)
data_low = data_low.reset_index()
del data_low['Date']
print(data_low)

data_close = df['Close']
data_close = pd.DataFrame(data_close)
data_close = data_close.reset_index()
del data_close['Date']
print(data_close)

data_volume = df['Volume']
data_volume = pd.DataFrame(data_volume)
data_volume = data_volume.reset_index()
del data_volume['Date']
print(data_volume)

data_sma_20 = df['Close'].rolling(window=20).mean()
data_sma_20 = pd.DataFrame(data_sma_20)
data_sma_20 = data_sma_20.reset_index()
del data_sma_20['Date']
data_sma_20 = data_sma_20.fillna(0)
data_sma_20.columns = ['sma20']
print(data_sma_20)

data_sma_50 = df['Close'].rolling(window=50).mean()
data_sma_50 = pd.DataFrame(data_sma_50)
data_sma_50 = data_sma_50.reset_index()
del data_sma_50['Date']
data_sma_50 = data_sma_50.fillna(0)
data_sma_50.columns = ['sma50']
print(data_sma_50)

data_sma_200 = df['Close'].rolling(window=200).mean()
data_sma_200 = pd.DataFrame(data_sma_200)
data_sma_200 = data_sma_200.reset_index()
del data_sma_200['Date']
data_sma_200 = data_sma_200.fillna(0)
data_sma_200.columns = ['sma200']
print(data_sma_200)

data = pd.concat([data_open, data_high, data_low, data_close, data_volume, data_sma_20, data_sma_50, data_sma_200], axis=1)
data = pd.DataFrame(data)
data = data.reset_index()
del data['index']
data.columns = ['Open', 'High', 'Low', 'Close', 'Volume', 'sma20', 'sma50', 'sma200']
print(data)


              Open
0       465.864014
1       456.859985
2       424.102997
3       394.673004
4       408.084991
...            ...
2754  46285.500000
2755  45859.128906
2756  46445.273438
2757  46624.507812
2758  45491.375000

[2759 rows x 1 columns]
              High
0       468.174011
1       456.859985
2       427.834991
3       423.295990
4       412.425995
...            ...
2754  47028.281250
2755  47313.476562
2756  46791.089844
2757  47106.140625
2758  45499.734375

[2759 rows x 1 columns]
               Low
0       452.421997
1       413.104004
2       384.532013
3       389.882996
4       393.181000
...            ...
2754  45782.511719
2755  45634.105469
2756  45235.816406
2757  45544.808594
2758  44748.792969

[2759 rows x 1 columns]
             Close
0       457.334015
1       424.440002
2       394.795990
3       408.903992
4       398.821014
...            ...
2754  45868.949219
2755  46453.566406
2756  46622.675781
2757  45555.992188
2758  45378.992188

[2759 rows x

In [7]:
# Convert default of 1 day bars to n period bars for look forward prediction period:
data = data[data.index % forward_prediction_period == 0]
print("DATA TEST")
print(data)


DATA TEST
              Open          High           Low         Close       Volume  \
0       465.864014    468.174011    452.421997    457.334015     21056800   
1       456.859985    456.859985    413.104004    424.440002     34483200   
2       424.102997    427.834991    384.532013    394.795990     37919700   
3       394.673004    423.295990    389.882996    408.903992     36863600   
4       408.084991    412.425995    393.181000    398.821014     26580100   
...            ...           ...           ...           ...          ...   
2754  46285.500000  47028.281250  45782.511719  45868.949219  29336594194   
2755  45859.128906  47313.476562  45634.105469  46453.566406  25414397610   
2756  46445.273438  46791.089844  45235.816406  46622.675781  32499785455   
2757  46624.507812  47106.140625  45544.808594  45555.992188  29640604055   
2758  45491.375000  45499.734375  44748.792969  45378.992188  35218669568   

             sma20         sma50        sma200  
0         0.0000

In [8]:

sc1 = MinMaxScaler(feature_range=(0, 1))
sc2 = MinMaxScaler(feature_range=(0, 1))
sc3 = MinMaxScaler(feature_range=(0, 1))
sc4 = MinMaxScaler(feature_range=(0, 1))
sc5 = MinMaxScaler(feature_range=(0, 1))
sc6 = MinMaxScaler(feature_range=(0, 1))
sc7 = MinMaxScaler(feature_range=(0, 1))
sc8 = MinMaxScaler(feature_range=(0, 1))
input_feature = data[['Open', 'High', 'Low', 'Close', 'Volume', 'sma20', 'sma50', 'sma200']].values

symbol_open = input_feature[:, 0]
symbol_high = input_feature[:, 1]
symbol_low = input_feature[:, 2]
symbol_close = input_feature[:, 3]
symbol_volume = input_feature[:, 4]
symbol_sma20 = input_feature[:, 5]
symbol_sma50 = input_feature[:, 6]
symbol_sma200 = input_feature[:, 7]

input_data_1 = sc1.fit_transform(symbol_open.reshape(-1, 1))
input_data_2 = sc2.fit_transform(symbol_high.reshape(-1, 1))
input_data_3 = sc3.fit_transform(symbol_low.reshape(-1, 1))
input_data_4 = sc4.fit_transform(symbol_close.reshape(-1, 1))
input_data_5 = sc5.fit_transform(symbol_volume.reshape(-1, 1))
input_data_6 = sc6.fit_transform(symbol_sma20.reshape(-1, 1))
input_data_7 = sc7.fit_transform(symbol_sma50.reshape(-1, 1))
input_data_8 = sc8.fit_transform(symbol_sma200.reshape(-1, 1))

input_data = np.hstack((input_data_1, input_data_2, input_data_3, input_data_4,input_data_5, input_data_6, input_data_7, input_data_8))


In [20]:
test_size = int(.3 * len(data))
X = []
y = []
for i in range(len(data) - lookback - 1):
    t = []
    for j in range(0, lookback):
        t.append(input_data[[(i + j)], :])
    X.append(t)
    y.append(input_data[i + lookback, 1])

X, y = np.array(X), np.array(y)
X_test = X[test_size + lookback:]
Y_test = y[test_size + lookback:]
# X = X.reshape(X.shape[0], lookback, 8)
# X_test = X_test.reshape(X_test.shape[0], lookback, 8)
# print(X.shape)
# print(X_test.shape)


In [21]:
X.shape

(2757, 1, 1, 8)

In [None]:
# BUILD THE RNN MODEL

# We add 30 RNN cells that will be stacked one after the other in the RNN, implementing an efficient stacked RNN.
# return_sequencesis True to return the last output in the output sequence.
# input_shape will be of the 3D format of test sample size, time steps, no. of input features. output one unit.

model = Sequential()
model.add(Dropout(0.2, input_shape=(X.shape[1], 8)))  # can adjust dropout %
model.add(Activation('relu'))  # can likely replace and test with sigmoid or softmax also to see if results improve
model.add(LSTM(units=30, return_sequences=True, input_shape=(X.shape[1], 8)))
model.add(LSTM(units=30, return_sequences=True))
model.add(LSTM(units=30))
model.add(Dense(units=1))
model.summary()


In [None]:
# now compile the model using adam optimizer and loss function will be mean squared error for the regression problem
model.compile(optimizer='adam', loss='mean_squared_error')

# now fit the data to the input data using batch_size of 32 and 100 epochs
model.fit(X, y, epochs=100, batch_size=32, validation_data=(X_test, Y_test),validation_split=0.1)  # change values to test results, default at 10% testing data (0.1)


In [None]:
predicted_value = model.predict(X_test)


In [None]:

print("PREDICTED VALUE FOR:")
print(symbol)
print("AT A PERIOD OF:")
print(forward_prediction_period)
print("DAY(S) IS.........")
predicted_symbol_price = sc2.inverse_transform(predicted_value)
predicted_symbol_price = pd.DataFrame(predicted_symbol_price)
lstm_predicted_symbol_value = predicted_symbol_price.values[-1:]
print(lstm_predicted_symbol_value)
