## Plutus
A simple classifier which aims to identify stocks currently on a "bullish" trend.

An integral part of the eventual application. 

In [1]:
# the greek god of wealth (son of Iason and Demeter)

# dependencies

import os
import math
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import requests

# for loading data
from scipy.io import loadmat
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import accuracy_score
from random import randint
from sklearn.linear_model import SGDClassifier

import alpaca_trade_api as tradeapi

# api keys
api = tradeapi.REST(
        'AKUGANQEC0256T5OKJJA',
        'LNPrnn2jpq8HTRb86xv7jeEfV4qPxJbJz18IozgD',
        'https://api.alpaca.markets',
        api_version = 'v2')

# paper api
paper_api = tradeapi.REST('PKDKAO1JPMSFI86UKWW0',
        '5L1EmPdcDFMalPtLCgqizoJ5agAFYOrh6Z6MGXUU',
        'https://paper-api.alpaca.markets',
        api_version = 'v2')

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
print('device: ', device)

# nasdaq tickers
# can add from other exchanges
nasdaq = pd.read_csv('nasdaq.csv', sep=',', low_memory=False)
official_ticks = np.array(nasdaq['Symbol'])
print(official_ticks)

device:  cpu
['A' 'AA' 'AAC' ... 'ZYME' 'ZYNE' 'ZYXI']


In [2]:
account = api.get_account()

## Purchase classifier
The aim of this section is to identify stocks which are viable for purchase, meaning that they are currently on a bullish trend.

In [7]:

# getData
# String -> [int]
# The purpose of getData is to build the predictors to train our
# classifier. It will return the above data for the given ticker.
def getData(ticker, days):
    predictor = []
    try:
        barset = api.get_barset(ticker, 'day', limit=days)
    except (requests.HTTPError, ValueError):
        print("Invalid entry")
        return None
    bars = barset[ticker]
    if(len(bars) != days):
        return None
    # append the last ten closing prices as features
    for x in range(days):
        #predictor.append(bars[x].c/bars[0].c)
        predictor.append(bars[x].c)
    predictor = predictor
    # append the last ten volumes as features
    for y in range(days):
        #predictor.append(bars[y].v/bars[0].v)
        predictor.append(bars[y].v)
    #toReturn = np.array(predictor)
    return predictor

# call to get the volume data

#classifyData
# [float] -> int 
# classifyData is passed a set of features (a predictor) 
# and classifies the data point as profitable (purchase)
# or not profitable. In essence, we want to confirm that 
# drawbacks in price (if a closing price is less than the previous day)
# are increasingly high (so, the stock is still on a positive trajectory) <-- is the sample size too small?
# and that the volume trend confirms the price trend (volume is generally increasing?)

# the thing is, we don't want the neural network to learn
# our basic, scuffed up function
# we want it to identify patterns of its own

# what patterns indicate a good purchase point?
# these patterns indicate a positive momentum trend
# so we would want to train the classifier on the 5 day data from before this trend beginning?

# however, we want to make sure that we are not purchasing at the turn of a stock, at a reversal
def classifyData(predictor, days):
    volLow = predictor[days]
    # set the low to first value
    low = predictor[0]
    # we begin with the assumption of profitability
    profitable = True
    # move through the closing prices
    # of the stock

    # we are going to go with higher lows (in terms of drawbacks)
    for x in range(1, days):
        if(predictor[x] < predictor[x - 1]):
            # initial low
            low = predictor[x]
            # exit the loop
            break
    
    # if we make it through the loop without a drawback
    # then, profitable does not become False
    for h in range(1, days):
        if(predictor[h] < predictor[h - 1]):
            if(predictor[h] < low):
                # only update if lower low is reached
                profitable = False
                break
            else:
                # else, we keep moving through the list, 
                # with the new drawback set as the low
                low = predictor[x]
                    
    # what volume trend would "confirm" the shape of the data
    
    # now, the volume requirements:
    # if it has made it past this point, then the price is on a bullish trend
    # volume has to "confirm" the trend
    # "higher highs" and "lower lows"
    p = days
    while(p < days * 2):
        if(predictor[p] < predictor[p - 1]):
            volLow = predictor[p]
            # exit the loop
            break
        p+=1

    u = days
    while(u < days * 2):
        if(predictor[u] < predictor[u - 1]):
            # it means the volume is not hitting a "higher high"
            if(predictor[u] < volLow):
                profitable = False
                break
            else:
                # else, we keep moving through the list, 
                # with the new drawback set as the low
                volLow = predictor[u]
        u+=1
    
    return profitable

# just to test the viability of the classifier (returns true if the price has increased)
def simpClassifyData(predictor):
    if(predictor[0] < predictor[9]):
        return True
    else:
        return False

# build the training data for the classifier
def buildData(tickers, days):
    x_data = []
    y_data = []
    for ticker in tickers:
        if(getData(ticker) != None):
            toAdd = getData(ticker, 5)
            # perhaps we append the 5-day data from before 
            # the ten day period that we are measuring
            x_data.append(toAdd)
            if(classifyData(toAdd, 5)): 
                # will be a profitable stock
                y_data.append(1)
            else:
                # won't be a profitable stock
                y_data.append(0)
        else:
            # we do nothing
            continue
    return x_data, y_data

In [8]:
# Tests

ay = getData('AMZN')
print(ay[4])
print(ay)

otay = classifyData(ay)
print(otay)

3372.9
[3419.77, 3393.71, 3413.22, 3383.01, 3372.9, 1623081, 2585900, 2476523, 1582459, 1671897]
False


In [9]:
# lets grab 100 random tickers
names = []
# so there is a maximum number of api calls
while(len(names) < 100):
    value = randint(0, len(official_ticks)  - 1)
    if(not(official_ticks[value] in names)):
       names.append(official_ticks[value])

# now, for test data, random batch of 30 names

In [10]:
x_train, y_train = buildData(names, 5)
#x_test, y_test = builData(tickers[201:300])

In [11]:
test_names = []
while(len(test_names) < 75):
    value = randint(0, len(official_ticks) - 1)
    if(not(official_ticks[value] in names) and not(official_ticks[value] in test_names)):
        test_names.append(official_ticks[value])

In [12]:
x_test, y_test = buildData(test_names)

sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/day 3 more time(s)...


## Further pre-processing
To center the predictor variables (training data, test data) one could divide all of the values by the average feature values (how to get the average feature?) and/or dividing each feature value by its standard deviation.


Dimensionality reduction? (principle component analysis?)

In [13]:
# check data
print(len(x_train))
print(len(y_train))
print(len(x_test))
print(len(y_test))

print(y_train)
# this shit imbalanced
print(y_test)

93
93
73
73
[1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]


## Classifier
Here we start the classifier.


How to refine to best fit the data?

To understand:
    - How are the weights initialized? (pytorch uses Xavier initialization)

In [14]:
# Train the network
# (A simple feed foward network)

# refine

# REFINE THE MODEL
# add some "twists" based on your more "intimiate" understanding of what a neural network is, and
# how it can be applied to this problem

# network will use binary classification, either "habitable" or not
class NN(nn.Module):
    def __init__(self, n_layers, hidden_size, activations):
        super().__init__()
        assert len(hidden_size) > 0
        # valid activation functions to choose from
        # can the activations affect the classification in an imbalanced dataset
        act = {'sigmoid': nn.Sigmoid(), 'tanh': nn.Tanh(), 'relu': nn.ReLU(),'identity': nn.Identity()}
        self.layers = []
        
        for i in range(n_layers - 1):
            # hidden size i will be number of input neurons'
            # hidden size i + 1 will be number of neurons to send signals to
            self.layers.append(nn.Linear(hidden_size[i], hidden_size[i + 1]))
            
            # if activations[i] in act, meaning if its a valid activation function that
            # we are looking for
            if activations[i] in act:
                self.layers.append(act[activations[i]])
            else:
                assert activations in ['sigmoid', 'tanh', 'relu', 'identity']
                
            # how does pytorch even work my boy
            #self.layers.append('sigmoid')
        #self.layers.append(nn.Linear(hidden_size[n_layers - 2], 1))
        self.layers = nn.ModuleList(self.layers)
        
    # an iteration of the neural network
    def forward(self, x):
        for idx in range(len(self.layers) - 1):
            x = self.layers[idx](x)
        return x

In [15]:
def train(model, optimizer, criterion, n_epoch, data, label):
    # signals floor division
    print_iteration = n_epoch//5
    
    # data will be the x_training data as a tensor
    data = torch.tensor(data, dtype=torch.float).to(device)
    
    # label will be the y_training data as a tensor
    label = torch.tensor(label, dtype=torch.long).squeeze().to(device)
    
    for epoch in range(n_epoch):
        predict = model(data)
        optimizer.zero_grad()
        
        # the loss function should be tuned
        loss = criterion(predict, label)
        
        loss.backward()
        
        optimizer.step()
        
    if epoch%print_iteration == 0:
        print('epoch: ', epoch, '\tloss: ', loss.item())
        
    print('epoch: ', epoch, '\tloss: ', loss.item())
    return model.named_parameters

In [16]:
def model_accuracy(data, label):
    data = torch.tensor(data, dtype=torch.float).to(device)
    predict = model(data)
    
    # argmax? so it takes the maximum arguement of a dimensionality reduction?
    # what math is happening?
    predict = torch.argmax(predict, dim=-1).cpu().detach().numpy()
    acc = accuracy_score(predict, label)
    return acc, predict

In [18]:
# put this in the main function
# do we want to train it from scratch at each subsequent call?
# how do we continually refine w/ new data? preserve the state of the model

n_epoch = 1000
learning_rate = 0.1

# want the network to be hundred percent accurate

# want to train the network using different assortments of settings
settings = {'n1': {'hs': [10, 10, 10, 10, 2], 'act': ['relu','relu','relu','sigmoid']}}


# we want 100 percent accuracy
for setting in settings:
    print('---------------- Network -------------------')
    print(setting) 
    n_layers = len(settings[setting]['hs'])
    hidden_size = settings[setting]['hs']
    activations = settings[setting]['act']
    
    model = NN(n_layers, hidden_size, activations).to(device)
    
    #change for imbalanced data 
    # what is the optimizer ?
    
    # goal: make the model more accurate
    # what should we refine?
    
    #optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    optimizer = optim.Adadelta(model.parameters())
    
    # class weights
    criterion = nn.CrossEntropyLoss()
    #criterion = nn.MSELoss()
    print('--------------- Training -------------------')
    param = train(model, optimizer, criterion, n_epoch, x_train, y_train)
    print('Train Accuracy: ', model_accuracy(x_train, y_train))
    print('--------------- Testing -------------------')
    accuracy, predict = model_accuracy(x_test, y_test)
    print('Test Accuracy: ', model_accuracy(x_test, y_test))



---------------- Network -------------------
n1
--------------- Training -------------------
epoch:  999 	loss:  0.3395290970802307
Train Accuracy:  (0.946236559139785, array([1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0,
       1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0]))
--------------- Testing -------------------
Test Accuracy:  (0.7534246575342466, array([1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0,
       0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1,
       0, 0, 0, 1, 0, 1, 0]))


In [34]:
# findPurchase
# [String] -> String
# findPurchase takes in a list of tickers, and iterates through them
# until the model reccomends one for purchase. It will return the name of the first valid ticker
# to be purchased.
def findPurchase(tickers):
    for ticker in tickers:
        well = []
        well.append(ticker)
        x, y = buildData(well)
        
        # meaning, the dataset is not valid
        # will not work in creating a prediction
        if(len(x) != 10):
            continue
        data = torch.tensor(x, dtype=torch.float).to(device)
        prediction = model(data)
        
        # what does this reduction actually do
        predict = torch.argmax(prediction, dim=-1).numpy()
        
        # this returns the first ticker identified for sale
        if(predict == 1):
            return ticker
    print("No valid tickers")
    

In [36]:
# Test for findPurchase

# subscription to Alpaca premium: 9$ a month
# perhaps worth it?
# i need a source of reliable data

# perhaps we would be better off using a random number generator
# because we can make limited calls to the api for stock information
findPurchase(official_ticks[:200])
#print(official_ticks[:50])

sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/day 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/day 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/day 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/day 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/day 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/day 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/day 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/day 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/day 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/day 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v1/bars/day 3 more time(s)...
sleep 3 seconds and retrying https://data.a

KeyboardInterrupt: 

## Purchase
Here, we make the actual aquisition of the stock. Based on the model that has already been train, we simply make the call to findPurchase, which will select a stock based on our requirements and buy it.

(In order to make the process fully automated, we put the whole file into google cloud scheduler, and then create a main function to run it?)

In [None]:
# buy
# void -> void
# The purpose of this function is to execute the purchase of a desired stock. In terms of the proportion of my
# portfolio that I am willing to invest, I shall begin with a base 5 percent for each purchase

# make this dynamic? (i.e, how to adjust the size of the purchase?)
# we need a more detailed classifier, with perhaps more classes
# a confidence scale, if you will, based on the possible strength in the trend
# The network could predict the price increase, for instance (Hmmmmmm)
# or just an extremely accurate price prediction (trend prediction is far more viable, and valuable)

def buy():
    # find the ticker to buy
    to_buy = findPurchase(official_ticks)
    # then, make the call to the Alpaca API to make a purchase (10 percent of portfolio?)
    purchase_power = account.buyingpower
    
    amount = purchase_power * 0.05
    
    last_price = api.get_last_quote(to_buy)
    
    api.submit_order(
        symbol=to_buy,
        qty=amount/last_price,
        side='buy',
        type='market',
        time_in_force='gtc')

In [None]:
# running the automated trade

# in terms of training our classifier, the classifyData function
# will need to be tuned the most


# how many times wwill we run this? Purchase on multiple tickers?
# have to iron this out in the actual calls to the trader
buy()

## Sell
In this segment, the classifier will decide when to sell.
(Track current portfolio? Based on stocks being tracked)

In [58]:
# a second model
# an SVM?
# simple logistic regression?

# first, we would want to get our positions
my_pos = paper_api.list_positions()
pos_df = pd.DataFrame(positions)


# getSymbols
# df? (what is that) -> [String]
# parameters: 
# positions (all of the information on current assets associated w/ the current account)
# getSymbols returns a list of all the symbols invested that are associated with the current
# account 

def getSymbols(positions):
    myticks = []
    p = 0
    while(p < len(positions)):
        myticks.append(positions[p].symbol)
        p += 1
    return myticks


uh = getSymbols(my_pos)

print(uh)

# how to have a list of tickers?
# how to find a sell point

# classifier:
# we want to train a classifier to make the identification automatically
# 10 day data on reversals?
# do we want to slowly build this identification ourselves? Or use some sort of preprocessed database?

# reversal
# a lower low than the previous pullback?


# For the classifier, we first need to build the dataset
# so, we will try to identify if a reversal is about to take place
# for as many api calls as possible

# reversalSimple
# String -> Boolean
# parameters:
# ticker is the symbol that we will identify as on the verge of a reversal or not.
# In this simple function, we will look for a "head and shoulders" reversal
# 1.) prior uptrend
# 2.) Left shoulder on heavier volume
# 3.) rally to new highs on lighter volume
# 4.) Decline that moves below previous peak, approaches previous reaction low
# 5.) Third rally on noticeably light volume that fails to reach the top of the head
# 6.) A close below the neckline
# 7.) Return move to the neckline followed by lower lows
# At this point, is it already too late? We have lost the peak
def reversalSimple(ticker):
    try:
        # build data over 10 day period, for reversal classification
        toTrack = getData(ticker, 10)
    except (requests.HTTPError, ValueError):
        print("Invalid entry")
        return None
    
    # so for the trend to begin, there first needs to be a prior uptrend
    # Perhaps, we shall simply say in the first 5 days of the momentum swing?
    # 1/2 ()
    
    # check uptrend, in the first 5 days
    toCheck = toTrack[:5]
    toCheck.concat(toTrack[10:15])

    uptrend = classifyData(toCheck, 5)
    if(not(uptrend)):
        return False
    
    # if there was a prior uptrend in the previous 5 days, on the 7th day, there
    # should be new high on light volume
    
    if((toTrack[6] > toTrack[4]) and (toTrack[16] < toTrack[14])):
        print("possible reversal")
    else:
        return False
    
    # then, decline moves below previous peak, and rally with lighter volume than previous volume peak
    if((toTrack[7] < toTrack[6]) and ((toTrack[8] > toTrack[7]) and (toTrack[18] < toTrack[16]))):
        return True
    
    # the last day is irrelevant. 
    
    # will automatically return False
    return False


# build the training data for the classifier
def buildData_sell(tickers, days):
    x_data = []
    y_data = []
    for ticker in tickers:
        if(getData(ticker, days) != None):
            toAdd = getData(ticker, days)
            # perhaps we append the 5-day data from before 
            # the ten day period that we are measuring
            x_data.append(toAdd)
            if(reversalSimple(ticker)): 
                # will be a profitable stock
                y_data.append(1)
            else:
                # won't be a profitable stock
                y_data.append(0)
        else:
            # we do nothing
            continue
    return x_data, y_data




# we would need to calculate the current RSI, along with the previous RSI's to solidify the trend
# how to get the live RSI?
# we want to identify if the ticker is crossing a certain threshold in the RSI
def calcRSI(ticker):
    pass
    

['TSLA', 'PLTR', 'AMZN', 'AAPL']


In [None]:
# build a collection of ticker names 
x_train_sell, y_train_sell = buildData_sell(names, 10)
x_test_sell, y_test_sell = buildData_sell(test_names, 10)

In [None]:
# now, for the classifier itself
# using an SVM with a perceptron loss function

# how to set the hyperparameters
# need an intimate understanding of SVMS
sell_classifier = SGDClassifier(loss='perceptron', learning_rate = 'adaptive')
sell_classifier.fit(x_train_sell, y_train_sell)

# to see the effectiveness of the model
# we will predict on the test data


# then, we use the classifier to make predictions pertaining to
# 10-day information around our current positions, if any exist

# if there is the possibility of a head/shoulders reversal
# we sell, before it begins the true bearish trend

# findSale 
# [String] -> [String]
# parameters
# myPositions: A list of the string ticker symbols of all of the users current, active positions
# purpose: To determine if any of the actively held positions are in the course of a reversal
def findSale(myPositions):
    toSell = []
    for position in myPositions:
        toCheck = getData(position, 10)
        if(sell_classifier.predict(toCheck) == 1):
            toSell.append(position)
    return toSell


# makeSale
# parameters
# api: paper, or actual?
def makeSale(api):
    sell = findSale(myPositions)
    i = 0
    while(i < len(sell)):
        
        # first, find the amount of the current symbol held
        size = api.get_position[sell[i]].qty
        
        api.submit_order(
        symbol=findSale[i],
        qty=size,
        side='sell',
        type='limit',
        time_in_force='gtc')    
    

30
