In [2]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import pandas as pd
import yfinance as yf
import os
from tensorflow.keras import layers
import random as rd

In [84]:
#predictor class
#training_data = [np.array(training), np.array(labels)], lyrs = [layers.], raw_data - indicators(dataframe)
class predictor:
    def __init__(self, lrs, optimizer=tf.keras.optimizers.Adam(1e-4), loss=tf.keras.losses.MeanSquaredError(), *args, **kwargs): #it's a regression model so no accuracy here
        self.lrs = lrs
        self.optimizer = optimizer
        self.loss = loss
        self.model = tf.keras.Sequential()
        for each in self.lrs: self.model.add(each)
        self.model.compile(optimizer = self.optimizer, loss = self.loss)

    def train(self, training_data, labels, epochs=100):
       self.model.fit(x=training_data, y=labels, epochs=epochs, shuffle=True)

    def examine_bias(self, raw_data, training_data, labels):
      predicts = [] 
      biases = []
      for i in range(len(labels)-1):
        prediction = self.model.predict(training_data[i:i+1])
        predicts.append(prediction)
        biases.append((labels[i] - prediction)/prediction)

      predicts = np.array(predicts)
      predicts = np.append(np.array([None]*(len(raw_data) - len(training_data) + 1)), np.reshape(predicts, (predicts.shape[0], )))
        
      positive = [i for i in biases if i < 0]
      negative = [i for i in biases if i > 0]
      
      avg_positive = sum(positive)/len(positive)
      avg_negative = sum(negative)/len(negative)
      self.bias = (avg_positive, avg_negative) #estimation of how pessimistic/optimistic the model is
    
    def pred(self, data):
      return self.model.predict(data)
    
    def make_prediction(self, data):
      p = self.pred(data) 
      return (p+p*self.bias[0], p, p+p*self.bias[1])
    

## methods for data engineering
def calcMACD(data):  #this counts the key statistical indicator for the strategy. MACD in my case
    prices = data['Close']
    indicator = prices.ewm(span=12, adjust=False, min_periods=12).mean() - prices.ewm(span=26, adjust=False, min_periods=26).mean()
    signal = indicator.ewm(span=9, adjust=False, min_periods=9).mean()
    d = indicator - signal
    return d

def ma(data, span):
  mean = []
  for e in range(len(data[span:])):
    mean.append(sum(data[e-span:e])/span)
  return np.array(mean)

def createdataset(secu):
  indicators = pd.DataFrame([])
  indicators['open'], indicators['close'], indicators['high'], indicators['low'] = secu.Open[100:], secu.Close[100:], secu.High[100:], secu.Low[100:]
  indicators['macdhist'] = calcMACD(secu)[74:]
  indicators['ma20'], indicators['ma50'] = ma(secu.Close, 20)[80:], ma(secu.Close, 50)[50:]
  return indicators

def get_trainingdata(indicators):
  training = []
  labels = []
  training_full = []
  for i in range(0, len(indicators)-5):
    ins = pd.DataFrame([indicators[i:i+5].open,
                        indicators[i:i+5].close,
                        indicators[i:i+5].high,
                        indicators[i:i+5].low,
                        indicators[i:i+5].macdhist,
                        indicators[i:i+5].ma20,
                        indicators[i:i+5].ma50])

    y = indicators.close[i+5]
    pic = np.reshape(ins.values, (5, 7, 1))

    training.append(pic)
    labels.append(y)
    training_full.append((pic, y))

  training = np.array(training)
  labels = np.array(labels)
  return training, labels



In [85]:
raw = secu
indicators = createdataset(raw)
training_data, labels = trainingdata(indicators)

  y = indicators.close[i+5]


In [86]:
lyrs = [layers.Conv1D(100, kernel_size=2, strides=(2), padding='same', input_shape = [5, 7, 1], activation='sigmoid'),
        layers.MaxPooling2D((1, 2)),
        layers.LeakyReLU(),
        layers.BatchNormalization(),
        layers.Dropout(0.15),

        layers.Conv1D(50, kernel_size=3, strides=(2), padding='same', activation='sigmoid'),
        layers.LeakyReLU(),
        layers.Dropout(0.15),

        layers.Flatten(),
        layers.Dense(50, activation='sigmoid'),
        layers.Dense(10, activation='relu'),
        layers.Dense(1, activation='relu')]

In [87]:
estimator = predictor(lyrs)

In [88]:
f = ['aapl', 'nke', 'nflx','goog','sbux','intc','hmy','jnj']


In [None]:
for each in f:
    secu = yf.download(each, period='10y', interval='1d')
    ins = createdataset(secu)
    tr, ls = get_trainingdata(ins)
    estimator.train(tr, ls, epochs = 180)

In [None]:
#say, we`re gonna purpose this estimator for the last dataset it was trained with
estimator.examine_bias(secu, tr, ls)

In [92]:
estimator.bias[0]

array([[-0.01110723]], dtype=float32)

In [98]:
estimator.make_prediction(tr[-1:])



(array([[161.26332]], dtype=float32),
 array([[163.07463]], dtype=float32),
 array([[164.97105]], dtype=float32))

In [99]:
ls[-1]

163.99000549316406

In [None]:
#pipeline of creating a predictor object: obj = predictor(lyrs)
#                                         train on a variety of securities
#                                         obj.examine_bias(on a security that the estimator is purposed for)
#                                         ready for making predictions