# Here is my attempt at crypto-trading. I utilize datetime, neural nets, supervised, and unsupervised learning to code an algorithm to automatically trade cryptocurrency.

In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats
import matplotlib.pyplot as plt
from pandas_datareader import data
import datetime as dt
from datetime import datetime
import seaborn as sns
from scipy.stats import boxcox
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.metrics import classification_report
from tensorflow.keras.models import Sequential,model_from_json
from tensorflow.keras.layers import Dense, LSTM, Dropout, ConvLSTM2D, Flatten, RepeatVector,TimeDistributed
from tensorflow.keras.utils import to_categorical
import umap
import pickle

  from pandas.util.testing import assert_frame_equal


In [2]:
#Functions I will use so I'm saving them for later
def convert_data(data, step):
  X, y = [], []
  for i in range(len(data)-step):
    d = i + step
    X.append(data[i:d,:-1])
    y.append(data[d,-1])
  return np.array(X), np.array(y)

def evaluate_forecasts(actual, predicted):
	scores = list()
	# calculate an RMSE score for each day
	for i in range(actual.shape[1]):
		# calculate mse
		mse = mean_squared_error(actual[:, i], predicted[:, i])
		# calculate rmse
		rmse = sqrt(mse)
		# store
		scores.append(rmse)
	# calculate overall RMSE
	s = 0
	for row in range(actual.shape[0]):
		for col in range(actual.shape[1]):
			s += (actual[row, col] - predicted[row, col])**2
	score = sqrt(s / (actual.shape[0] * actual.shape[1]))
	return score, scores

In [3]:
# Training data is found from kaggle. Link is below. I'm using the btc_usd one
#link: https://www.kaggle.com/tencars/392-crypto-currency-pairs-at-minute-resolution



df = pd.read_csv(r"/content/drive/My Drive/Crypto/btcusd.csv")

In [4]:
df = df.groupby('time').mean()

In [5]:
#I want datetime objects. It's easier to deal with
display(df.info())
# df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2841761 entries, 1364774820000 to 1593425100000
Data columns (total 5 columns):
 #   Column  Dtype  
---  ------  -----  
 0   open    float64
 1   close   float64
 2   high    float64
 3   low     float64
 4   volume  float64
dtypes: float64(5)
memory usage: 130.1 MB


None

In [6]:
#Check for nulls to see. YAY! No nulls!
df.isna().mean()

open      0.0
close     0.0
high      0.0
low       0.0
volume    0.0
dtype: float64

In [7]:
df.info

<bound method DataFrame.info of                   open    close         high      low     volume
time                                                            
1364774820000    93.25    93.30    93.300000    93.25  93.300000
1364774880000   100.00   100.00   100.000000   100.00  93.300000
1364774940000    93.30    93.30    93.300000    93.30  33.676862
1364775060000    93.35    93.47    93.470000    93.35  20.000000
1364775120000    93.47    93.47    93.470000    93.47   2.021627
...                ...      ...          ...      ...        ...
1593424860000  9136.20  9131.00  9136.300000  9131.00   2.821190
1593424920000  9131.00  9126.80  9131.000000  9126.80   0.980000
1593424980000  9126.80  9126.90  9126.900000  9126.80   0.260999
1593425040000  9126.80  9126.80  9126.800000  9126.80   0.226378
1593425100000  9126.80  9126.80  9126.878593  9126.80   0.029865

[2841761 rows x 5 columns]>

In [8]:
# I will add various RMAs in an attempt to give the computer a better chance at understanding and predicting when to buy/sell
# Let's visualize some of these as well
df['High 8 RMA'] = df['close'].rolling(window=8).mean()
df['High 13 RMA'] = df['close'].rolling(window=13).mean()
df['High 21 RMA'] = df['close'].rolling(window=21).mean()
df['High 55 RMA'] = df['close'].rolling(window=55).mean()
df['Bollinger High'] = df['High 13 RMA'] +df['close'].rolling(2).std()
df['Bolllinger Low'] =  df['High 13 RMA'] - df['close'].rolling(2).std()
df['returns'] = df['close'].pct_change(60)

#What I'm trying to predict
#1st strategy loses me money
df['target']=(df['returns'].shift(-60)>0).astype(int)

#2nd strategy
#df['target'] = ((df['close']>df['Bolllinger Low']).shift(-60)>0).astype(int)

#3rd Strategy
#df['target']=((df['High 8 RMA']>df['High 13 RMA']>df['High 21 RMA']>df['High 55 RMA']).shift(-60)>0).astype(int)

In [9]:

#now I'm going from 2017 to June 2020
df = df.iloc[-1051200:].copy()
#Making a time variable for visualizations
df['time'] = df.index
x = pd.to_datetime(df['time'],unit = "ms")
df.drop(columns = 'time', inplace = True)
#Moving stuff to my next notebook
x.to_csv("/content/drive/My Drive/Crypto/time.csv")

In [10]:
#Much better. Now it's time to check for NAs

In [11]:
# #Cool! Now let's check out how the line goes for open close high and low
# df.isna().sum()

In [12]:
# #This makes it look like the difference isn't really there. I'm curious how much of a difference there is between all of these
# plt.figure(figsize = (12,6))
# plt.plot("time","open", data = df, linewidth = .25)
# plt.plot("time","close", data = df, linewidth = .25)
# plt.plot("time","high", data = df, linewidth = .25)
# plt.plot("time","low", data = df, linewidth = .25)
# plt.legend()
# plt.show()

In [13]:
# #That's weird. None of the volume is 0
# plt.plot("time","volume",data = df)
# plt.title("Volume Traded")
# plt.show()


In [14]:
# display((df['volume']==0.000000001).value_counts())

In [15]:
# oc_diff = df['open']-df['close']
# display(oc_diff.max())
# display(oc_diff.min())
# display(oc_diff.max()-oc_diff.min())

In [16]:
df.columns

Index(['open', 'close', 'high', 'low', 'volume', 'High 8 RMA', 'High 13 RMA',
       'High 21 RMA', 'High 55 RMA', 'Bollinger High', 'Bolllinger Low',
       'returns', 'target'],
      dtype='object')

# So there's a little difference, but it's harder to see. The PDFs in the repo "Day.pdf", and  "Hour.pdf" will show that there are multiple crosses in the Bollinger values. Now that we know there is a difference, let's see if we can predict.

# At this point, things I can tune are
 - the time window of data being taken into the model
 - Which moving average the Bollinger bands are taking into account
 - Predicing the 4 moving average split rather than future price crosses

### Let's run some classification supervised learning models to see how it works. I will try LogRegression, gradient boosted classification, neural nets 

In [17]:
scaler = StandardScaler()
x = df.drop(['returns', 'target'], axis = 1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = .2, random_state = 69, shuffle = False)
X_train= scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
y_test.to_csv('/content/drive/My Drive/Crypto/y_test.csv', index=False)  
np.savetxt("/content/drive/My Drive/Crypto/X_test.txt",X_test)


In [18]:
df.columns

Index(['open', 'close', 'high', 'low', 'volume', 'High 8 RMA', 'High 13 RMA',
       'High 21 RMA', 'High 55 RMA', 'Bollinger High', 'Bolllinger Low',
       'returns', 'target'],
      dtype='object')

In [19]:
#Due to Ram constraints, please see my second notebook for model comparison

# LGR = LogisticRegression(class_weight = 'balanced', random_state = 69)
# LGR.fit(X_train, y_train)
# predictions = LGR.predict(X_test)
# np.savetxt("/content/drive/My Drive/Crypto/LGR_predictions.txt", predictions)
# pickle.dump(LGR, open("/content/drive/My Drive/Crypto/CryptoLGR_model.sav", "wb"))

In [20]:
# Due to Ram constraints, please see my second notebook for model comparison
# XGBC = XGBClassifier(max_depth = 2, learning_rate=.5, random_state=69)
# XGBC.fit(X_train, y_train)
# predictions = XGBC.predict(X_test)
# np.savetxt("/content/drive/My Drive/Crypto/XGB_predictions.txt", predictions)
# pickle.dump(XGBC, open("/content/drive/My Drive/Crypto/XGBC_model.sav", "wb"))

In [21]:
# lstm_df = df[df.columns.drop(list(df.filter(regex='t-')))]
# lstm_df.to_csv('/content/drive/My Drive/Crypto/lstm_df.csv', index=False)
# X_train, y_train = convert_data(lstm_df[:788400].values, 60)
# X_test, y_test = convert_data(lstm_df[788401:].values, 60)


In [22]:
print(X_test.shape)

(210240, 11)


In [23]:
y_test.shape

(210240,)

In [24]:
# #LSTM network
# #for LSTM start , input_shape=(X_train.shape[1], X_train.shape[2]
# # Answer below:
# model = Sequential()
# #Throw in some LSTM
# model.add(LSTM(100, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])))
# model.add(Dropout(0.2))
# model.add(LSTM(75))
# model.add(Dropout(0.2))
# #Finish out the model
# model.add(Dense(50, activation='relu'))
# model.add(Dense(1, activation='sigmoid'))


In [25]:
# #Adam Spannbauer coming back as an optimizer
# model.compile(optimizer = "adam", loss='binary_crossentropy',
#               metrics=['accuracy'])
# model.save("/content/drive/My Drive/Crypto/lstm_model.h5")
# model.save_weights("/content/drive/My Drive/Crypto/lstm_model_weights.h5")

In [26]:
#Now let's jump over to the other notebook for supervised learning model comparisons...

# Man, neural nets are performing bad. It's not that my features are bad, I don't ever blame myself... "All this hype about neural nets for nothing"- Jakob Salomonsson

In [27]:
scaler = StandardScaler()
x = df.drop(['returns', 'target'], axis = 1)
y = df['target']
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = .2, random_state = 69, shuffle = False)
X_train= scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [28]:
#Maybe some unsupervised learning will help these models go super sayian
#Let's try PCA and UMAP

In [29]:
X_train.shape

(840960, 11)

In [30]:
# # Trying 2,3,4 features. Want to see if this feature reduction will help the models will go further beyond
# #2 components LGR
# LGR = LogisticRegression(class_weight = 'balanced', random_state = 69)
# pca = PCA(n_components=2)
# PCA2_train = pca.fit_transform(X_train)
# PCA2_test = pca.fit_transform(X_test)
# LGR.fit(PCA2_train, y_train)
# predictions = LGR.predict(PCA2_test)
# pickle.dump(LGR, open("/content/drive/My Drive/Crypto/CryptoLGR2_model.sav", "wb"))
# np.savetxt("/content/drive/My Drive/Crypto/LGR2_X.txt",predictions)

# #2 Component XGBC
# XGBC = XGBClassifier(max_depth = 2, learning_rate=.5, random_state=69)
# XGBC.fit(PCA2_train, y_train)
# predictions = XGBC.predict(PCA2_test)
# np.savetxt("/content/drive/My Drive/Crypto/XGB2_predictions.txt", predictions)
# pickle.dump(XGBC, open("/content/drive/My Drive/Crypto/XGBC2_model.sav", "wb"))


# #---------------------------------------------------


# #3 components
# LGR = LogisticRegression(class_weight = 'balanced', random_state = 69)
# pca = PCA(n_components=3)
# PCA3_train = pca.fit_transform(X_train)
# PCA3_test = pca.fit_transform(X_test)
# LGR.fit(PCA3_train, y_train)
# predictions = LGR.predict(PCA3_test)
# pickle.dump(LGR, open("/content/drive/My Drive/Crypto/CryptoLGR3_model.sav", "wb"))
# np.savetxt("/content/drive/My Drive/Crypto/LGR3_X.txt",predictions)

# # 3 component XGBC
# XGBC = XGBClassifier(max_depth = 2, learning_rate=.5, random_state=69)
# XGBC.fit(PCA3_train, y_train)
# predictions = XGBC.predict(PCA3_test)
# np.savetxt("/content/drive/My Drive/Crypto/XGB3_predictions.txt", predictions)
# pickle.dump(XGBC, open("/content/drive/My Drive/Crypto/XGBC3_model.sav", "wb"))


# #-----------------------------------------------------


# #4 components
# LGR = LogisticRegression(class_weight = 'balanced', random_state = 69)
# pca = PCA(n_components=4)
# PCA4_train = pca.fit_transform(X_train)
# PCA4_test = pca.fit_transform(X_test)
# LGR.fit(PCA4_train, y_train)
# predictions = LGR.predict(PCA4_test)
# np.savetxt("/content/drive/My Drive/Crypto/LGR4_X.txt",predictions)
# pickle.dump(LGR, open("/content/drive/My Drive/Crypto/CryptoLGR4_model.sav", "wb"))
# #4 components XGBC
# XGBC = XGBClassifier(max_depth = 2, learning_rate=.5, random_state=69)
# XGBC.fit(PCA4_train, y_train)
# predictions = XGBC.predict(PCA4_test)
# np.savetxt("/content/drive/My Drive/Crypto/XGB4_predictions.txt", predictions)
# pickle.dump(XGBC, open("/content/drive/My Drive/Crypto/XGBC4_model.sav", "wb"))

In [31]:
# XGBC = XGBClassifier(max_depth = 2, learning_rate=.5, random_state=69)
# XGBC.fit(X_train, y_train)
# predictions = XGBC.predict(X_test)
# np.savetxt("/content/drive/My Drive/Crypto/XGB_predictions.txt", predictions)
# pickle.dump(XGBC, open("/content/drive/My Drive/Crypto/XGBC_model.sav", "wb"))

In [32]:
#Please look at the image in my github linked :
#This is why I'm only doing 1
import time
time_start = time.time()
UX_train = umap.UMAP(n_neighbors=3, min_dist=0.5, metric='correlation').fit_transform(X_train)
UX_test = umap.UMAP(n_neighbors=3, min_dist=0.5, metric='correlation').fit_transform(X_test)
#umap is taking forever so I'm just going to do one iteration of it. 3 
print('UMAP done! Time elapsed: {} seconds'.format(time.time()-time_start))

  "Random Projection forest initialisation failed due to recursion"


UMAP done! Time elapsed: 7818.269542455673 seconds


# Final conclusions

creativity in features = lack of earned money. The feature reduction methods don't even perfom better than the base signal.
- The simple up/down prediction loses around 1% per day
- The Bollinger band strategy also loses X% per day
- The youtube guru strategy loses money at x% per day

The top two strategies were things that I thought of. The final strategy is linked below

guru strategy = "https://www.youtube.com/watch?v=mqqx4cGyUoY "


There is a bunch of room for improvement (obviously) but that's for another day
