In [None]:
import pandas as pd
import datetime as dt 
import numpy as np
from sklearn.naive_bayes import GaussianNB, MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn import metrics 
import time
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV, RandomizedSearchCV
from sklearn.cluster import KMeans
from sklearn.feature_selection import mutual_info_classif
import plotly.graph_objs as go
import plotly.express as px
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn import ensemble
import matplotlib.pyplot as plt
from sklearn.inspection import permutation_importance
import shap
import pandas_ta as ta


import pandas_datareader as pdr
import yfinance as yf
import datetime as dt


from sklearn.preprocessing import MinMaxScaler


In [None]:
aapl_df = yf.download(  # or pdr.get_data_yahoo(...
        # tickers list or string as well
        tickers = "AAPL",

        # use "period" instead of start/end
        # valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
        # (optional, default is '1mo')
        period = "6mo",

        # fetch data by interval (including intraday if period < 60 days)
        # valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
        # (optional, default is '1d')
        interval = "1h",

        # group by ticker (to access via data['SPY'])
        # (optional, default is 'column')
        group_by = 'ticker',

        # adjust all OHLC automatically
        # (optional, default is False)
        auto_adjust = True,

        # download pre/post regular market hours data
        # (optional, default is False)
        prepost = True,

        # use threads for mass downloading? (True/False/Integer)
        # (optional, default is True)
        threads = True,

        # proxy URL scheme use use when downloading?
        # (optional, default is None)
        proxy = None
    )

aapl_df["Date"] = aapl_df.index

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style("darkgrid")
plt.figure(figsize = (15,9))
plt.plot(aapl_df[['Close']])
plt.title("Apple Stock Price",fontsize=18, fontweight='bold')
plt.xlabel('Date',fontsize=18)
plt.ylabel('Close Price (USD)',fontsize=18)
plt.show()

In [None]:
from sklearn.preprocessing import MinMaxScaler
price = aapl_df[['Close']]
scaler = MinMaxScaler(feature_range=(-1, 1))
price['Close'] = scaler.fit_transform(price['Close'].values.reshape(-1,1))

In [None]:
def split_data(stock, lookback):
    data_raw = stock.to_numpy() # convert to numpy array
    data = []
    
    # create all possible sequences of length seq_len
    for index in range(len(data_raw) - lookback): 
        data.append(data_raw[index: index + lookback])
    
    data = np.array(data)
    test_set_size = int(np.round(0.2*data.shape[0]))
    train_set_size = data.shape[0] - (test_set_size)
    
    x_train = data[:train_set_size,:-1,:]
    y_train = data[:train_set_size,-1,:]
    
    x_test = data[train_set_size:,:-1]
    y_test = data[train_set_size:,-1,:]
    
    return [x_train, y_train, x_test, y_test]
lookback = 20 # choose sequence length
x_train, y_train, x_test, y_test = split_data(price, lookback)

In [None]:
import torch
import torch.nn as nn
x_train = torch.from_numpy(x_train).type(torch.Tensor)
x_test = torch.from_numpy(x_test).type(torch.Tensor)
y_train_lstm = torch.from_numpy(y_train).type(torch.Tensor)
y_test_lstm = torch.from_numpy(y_test).type(torch.Tensor)
y_train_gru = torch.from_numpy(y_train).type(torch.Tensor)
y_test_gru = torch.from_numpy(y_test).type(torch.Tensor)

In [None]:
input_dim = 1
hidden_dim = 32
num_layers = 2
output_dim = 1
num_epochs = 100

In [None]:
class GRU(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, output_dim):
        super(GRU, self).__init__()
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        
        self.gru = nn.GRU(input_dim, hidden_dim, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_dim).requires_grad_()
        out, (hn) = self.gru(x, (h0.detach()))
        out = self.fc(out[:, -1, :]) 
        return out

In [None]:
model = GRU(input_dim=input_dim, hidden_dim=hidden_dim, output_dim=output_dim, num_layers=num_layers)
criterion = torch.nn.MSELoss(reduction='mean')
optimiser = torch.optim.Adam(model.parameters(), lr=0.01)

In [None]:
hist = np.zeros(num_epochs)
start_time = time.time()
gru = []

for t in range(num_epochs):
    y_train_pred = model(x_train)

    loss = criterion(y_train_pred, y_train_gru)
    print("Epoch ", t, "MSE: ", loss.item())
    hist[t] = loss.item()

    optimiser.zero_grad()
    loss.backward()
    optimiser.step()

training_time = time.time()-start_time    
print("Training time: {}".format(training_time))

In [None]:
predict = pd.DataFrame(scaler.inverse_transform(y_train_pred.detach().numpy()))
original = pd.DataFrame(scaler.inverse_transform(y_test_gru.detach().numpy()))

In [None]:
import seaborn as sns
sns.set_style("darkgrid")    

fig = plt.figure()
fig.subplots_adjust(hspace=0.2, wspace=0.2)

plt.subplot(1, 2, 1)
ax = sns.lineplot(x = original.index, y = original[0], label="Data", color='royalblue')
ax = sns.lineplot(x = predict.index, y = predict[0], label="Training Prediction (GRU)", color='tomato')
ax.set_title('Stock price', size = 14, fontweight='bold')
ax.set_xlabel("Days", size = 14)
ax.set_ylabel("Cost (USD)", size = 14)
ax.set_xticklabels('', size=10)


plt.subplot(1, 2, 2)
ax = sns.lineplot(data=hist, color='royalblue')
ax.set_xlabel("Epoch", size = 14)
ax.set_ylabel("Loss", size = 14)
ax.set_title("Training Loss", size = 14, fontweight='bold')
fig.set_figheight(6)
fig.set_figwidth(16)

In [None]:
import math, time
from sklearn.metrics import mean_squared_error

# make predictions
y_test_pred = model(x_test)

# invert predictions
y_train_pred = scaler.inverse_transform(y_train_pred.detach().numpy())
y_train = scaler.inverse_transform(y_train_gru.detach().numpy())
y_test_pred = scaler.inverse_transform(y_test_pred.detach().numpy())
y_test = scaler.inverse_transform(y_test_gru.detach().numpy())

# calculate root mean squared error
trainScore = math.sqrt(mean_squared_error(y_train[:,0], y_train_pred[:,0]))
print('Train Score: %.2f RMSE' % (trainScore))
testScore = math.sqrt(mean_squared_error(y_test[:,0], y_test_pred[:,0]))
print('Test Score: %.2f RMSE' % (testScore))
gru.append(trainScore)
gru.append(testScore)
gru.append(training_time)

In [None]:
aapl_df = aapl_df[1:]

k = aapl_df['Close'].ewm(span=12, adjust=False, min_periods=12).mean()
# Get the 12-day EMA of the closing price
d = aapl_df['Close'].ewm(span=26, adjust=False, min_periods=26).mean()
# Subtract the 26-day EMA from the 12-Day EMA to get the MACD
macd = k - d

aapl_df['Macd'] = macd
aapl_df





In [None]:
aapl_df = aapl_df.dropna()

In [None]:
data = aapl_df.to_numpy() 

train_data = [[row[6]] for row in data]

change = [row[5] for row in data]

x = train_data[:-100]

xTe = train_data[-100:]

yTe = change[-100:]


def dataset(pct_change):
  y = []
  for i in pct_change:
    if i > 0.001:
      y.append(1)
    elif i < -0.001:
      y.append(-1)
    else:
      y.append(0)
  return y


y = dataset(change[:-100])

print(sum(y))





X_train, X_valid, y_train, y_valid = train_test_split(x,y,test_size=.2, shuffle=False) # Using 80% for training, 20% validation





model1 = KMeans(n_clusters=3)

# model1 = GaussianNB(var_smoothing= 0.8111308307896871)


# GRID SEARCH

# model 1
# params = [{"var_smoothing": np.logspace(0,-9,num=100)}]
# gsearch1 = GridSearchCV(estimator=model1, cv=TimeSeriesSplit(max_train_size=500)
# , param_grid=params)
# gsearch1.fit(X_train, y_train)
# print(gsearch1.best_params_)



model1.fit(X_train, y_train)

y_val_pred = model1.predict(X_valid)



rand_score = metrics.rand_score(y_valid, y_val_pred)



print(rand_score)

metrics.silhouette_score(X_train, y_train, metric='euclidean', sample_size=None, random_state=None)
# recall_valid = metrics.recall_score(y_valid,y_val_pred,average=None, zero_division=True)
# precision_valid = metrics.precision_score(y_valid,y_val_pred,average=None, zero_division=True)
# accuracy_valid = metrics.accuracy_score(y_valid,y_val_pred)
# print("Model1: ")
# print(recall_valid)
# print(precision_valid)
# print(accuracy_valid, "\n")









In [None]:
account_balance = 20000
stock_balance=  0
stock_amount = 0
predictions = []




for i in xTe:
  price = i[0]
  prediction = model1.predict(np.array(i).reshape(1, -1))
  predictions.append(prediction)
  print(prediction)
  if len(predictions) > 3:
    window_prediction = sum(predictions[-3]) / 3
    if window_prediction > 0:
      purchase_amount = window_prediction * price
      if account_balance - purchase_amount < 0:
        purchase_amount = account_balance
        account_balance = account_balance - purchase_amount
        stock_balance += purchase_amount
        stock_amount += window_prediction
      else:
        account_balance = account_balance - purchase_amount
        stock_balance += purchase_amount
        stock_amount += window_prediction
    if window_prediction < 0:
      sell_amount = window_prediction * price
      if stock_balance - sell_amount < 0:
        sell_amount = stock_balance
        account_balance = account_balance + sell_amount
        stock_balance -= sell_amount
        stock_amount -= window_prediction
        

print("starting balance: 20000. Finishing balance: ", account_balance)












    

