# Algorithm Trading Machine Learning End to End

The purpose is to build an end-to-end skeleton for algo-trade making use of machine learning models, with which we can plug in any premium chosen model and estimates a practical result. Our sample target is to trade for 15 days starting with 10K.

## Steps

### Define tickers

With our purpose of trading heeding technical clues, not investing, the ticker choices shall be focusing proper curves. But so far we just use 4 random ones AAPL, AMZN, MSFT and GOOG. It would be easy to search and find other better tickers to pass through our sketeton to have practical measures.

### Get data and analyze

We get market historical data from Alpaca, analyze the data hope to find some correlation between daily return, once lagged daily return, and twice lagged. So far
 

In [482]:
import numpy as np
import pandas as pd
from dotenv import load_dotenv
import os
import alpaca_trade_api as tradeapi
from trade import trade_action
from datetime import datetime, timedelta
from sklearn import svm

In [483]:
def market_data( start, end, tickers ):
    load_dotenv()
    alpaca_api_key = os.getenv('ALPACA_API_KEY')
    alpaca_secret_key = os.getenv('ALPACA_SECRET_KEY')

    # Create the Alpaca API object
    alpaca_api = tradeapi.REST(
       alpaca_api_key,
       alpaca_secret_key,
       api_version = 'v2'
    )
    
    # Set timeframe to "1Day" for Alpaca API
    timeframe = "1Day"
    
    start_date = pd.Timestamp(start,tz='America/New_York')
    end_date = pd.Timestamp(end,tz='America/New_York')
           
    # Get number_of_years' worth of historical data for tickers
    data_df = alpaca_api.get_bars(
        tickers,
        timeframe,
        start = start_date.isoformat(),
        end = end_date.isoformat()
    ).df
    if len(data_df) == 0:
        return []
    return data_df

#df_market = market_data( '2023-08-01', '2023-08-21', tickers=['TSLA'] )

## Plan
* Get all ticker data for a year
* Get one ticker, get signal column
* Slice backwards, 15 for pred, 30 for train
* Scale train
* Rolling train, 30 at a time, predict one day at a time, till the end
* Store the prediction in my instr_df

In [484]:
def derive_instr( instrs ):
    buffer = [0]*4
    instr_df = pd.DataFrame()
    for index, instr in instrs.iterrows():
        #print(index, buffer, instr)
        one_row = pd.DataFrame()
        for idx in range(4):
            if buffer[idx]==1 and instr[idx]==-1:
                #print('In sell:', index, idx, -1)
                one_row = pd.DataFrame([[index, instr.index[idx], -1]])
                buffer[idx]=-1
                break
            elif buffer[idx]!=1 and instr[idx]==1:
                #print('In buy:',index, idx, 1)
                one_row = pd.DataFrame([[index, instr.index[idx], 1]])
                buffer[idx]=1
                break

        if len(one_row) == 0:
            one_row = pd.DataFrame([[index, instr.index[0], 0]])
        instr_df = pd.concat([instr_df, one_row])
    
    return instr_df

# my main

## 1. get market data

In [542]:
today = datetime.today().date()
tickers = ['AAPL', 'AMZN', 'MSFT', 'GOOG']
date_from = today - timedelta(days=365)
data_df = market_data(date_from, today, tickers)

dfs = {}
for ticker in tickers:
    df = data_df[data_df['symbol']==ticker]
    dfs[ticker] = df
                

## 2. prep data for algo

In [543]:
# signals is a df globally
# construct singals data:

signals_dfs = {}

for key in dfs:
    signals = dfs[key].loc[:,['vwap']]
    signals['daily_return'] = signals.pct_change()
    signals['lagged_daily_return'] = signals['daily_return'].shift(1)
    signals['twice_lagged'] = signals['daily_return'].shift(2)
    signals.dropna(inplace=True)
    signals_dfs[key]=signals
        
# visualize data prep for anz later
def visualize( ticker ):
    return signals_dfs[ticker][['daily_return','lagged_daily_return','twice_lagged']].hvplot(
                      title=ticker,
                      frame_width=700)
    
visualize('GOOG')

## 3. derive signal based on the observation of data

In [544]:
# derive signal - the singals df is global
for key in signals_dfs:
    # signals_dfs[key]['signal'] = np.where(
    #     (signals_dfs[key]['lagged_daily_return']>0) &
    #     (signals_dfs[key]['twice_lagged']>0), 
    #         -1, 
    #         np.where(
    #             (signals['lagged_daily_return']<0) &
    #             (signals['twice_lagged']<0), 1, 0 
    #                 )
    # )
    signals_dfs[key]['signal'] = np.where(
        signals_dfs[key]['lagged_daily_return']<0, 1, -1)


## 4. SVR on pct_change of vwap

### 4.1 the SVR ML

In [545]:
def one_cycle( cycle ):
    """ one cycle of 45 (30+15) days, using SVR directly on vwap pct_change """
   
    # use lagged returns to train SVM
    data = cycle.loc[:, ['lagged_daily_return', 'twice_lagged']]
    label = cycle['signal']
    instrs = pd.DataFrame()
    model = svm.SVR()
    
    # rolling train for 30 days then predict 1 day, 15 cycles for 15 instructions
    for instri in range(0,15):
        iloc_to = instri + 90
        X = data.iloc[:iloc_to,:]
        y = label.iloc[:iloc_to]
        model.fit(X, y)
        signal = model.predict(data.iloc[iloc_to:iloc_to+1,:])
        
        if signal > 0:
            instr = 1
        else:
            instr = -1
        #one_row = pd.DataFrame([[signals.index[iloc_to].date(), cycle['symbol'][0], instr]])
        one_row = pd.DataFrame([[cycle.index[iloc_to].date(), instr]])
        instrs = pd.concat([instrs,one_row], join='outer')
        
    return instrs

### 4.2 One cycle of 105 days (90+15)

In [546]:
instrs = pd.DataFrame()
for key in signals_dfs:
    cycle = signals_dfs[key].iloc[-105:,:]
    cycle_signals = one_cycle(cycle).set_index(0)
    cycle_signals.columns = [key]
    #instrs = pd.concat([instrs, one_cycle(cycle)])
    instrs = pd.concat([instrs, cycle_signals],axis=1)

instrs

Unnamed: 0_level_0,AAPL,AMZN,MSFT,GOOG
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-08-11,-1,-1,-1,-1
2023-08-14,1,1,1,1
2023-08-15,-1,-1,-1,-1
2023-08-16,1,1,-1,1
2023-08-17,1,1,1,1
2023-08-18,1,1,1,-1
2023-08-21,1,1,1,1
2023-08-22,-1,-1,-1,-1
2023-08-23,-1,-1,-1,-1
2023-08-24,-1,-1,-1,-1


#### 4.2.1 15-day trade actions

In [547]:
instr_df = derive_instr(instrs)
instr_df

Unnamed: 0,0,1,2
0,2023-08-11,AAPL,0
0,2023-08-14,AAPL,1
0,2023-08-15,AAPL,-1
0,2023-08-16,AAPL,1
0,2023-08-17,AMZN,1
0,2023-08-18,MSFT,1
0,2023-08-21,GOOG,1
0,2023-08-22,AAPL,-1
0,2023-08-23,AMZN,-1
0,2023-08-24,MSFT,-1


#### 4.2.2 show result

In [548]:
trade_action(instr_df)

On 2023-08-11 trade AAPL
{'action': 'none', 'price': 177.57, 'bal': 10000.0, 'share': 0, 'status': -1, 'msg': 'invalid action (1/-1 only)'}
On 2023-08-14 trade AAPL
{'action': 'buy', 'price': 178.61, 'bal': 7678.07, 'share': 13, 'status': 0, 'msg': 'success'}
On 2023-08-15 trade AAPL
{'action': 'sell', 'price': 178.21, 'bal': 9994.8, 'share': 13, 'status': 0, 'msg': 'success'}
On 2023-08-16 trade AAPL
{'action': 'buy', 'price': 177.18, 'bal': 7514.28, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-17 trade AMZN
{'action': 'buy', 'price': 134.76, 'bal': 5088.6, 'share': 18, 'status': 0, 'msg': 'success'}
On 2023-08-18 trade MSFT
{'action': 'buy', 'price': 315.23, 'bal': 2881.99, 'share': 7, 'status': 0, 'msg': 'success'}
On 2023-08-21 trade GOOG
{'action': 'buy', 'price': 128.3, 'bal': 444.29, 'share': 19, 'status': 0, 'msg': 'success'}
On 2023-08-22 trade AAPL
{'action': 'sell', 'price': 177.06, 'bal': 2923.13, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-23 trade AMZ

### 4.3 rolling run 500 times

In [554]:
def one_run(str_loc):
    instrs = pd.DataFrame()
    instrs = pd.DataFrame()
    for key in signals_dfs:
        cycle = signals_dfs[key].iloc[str_loc:,:]
        cycle_signals = one_cycle(cycle).set_index(0)
        cycle_signals.columns = [key]
        instrs = pd.concat([instrs, cycle_signals],axis=1)
        
    instr_df = derive_instr(instrs)
    return trade_action(instr_df, verbose=0)

final_worths = []
for n in range(500):
    final_worths.append( one_run(-105-n) )

pd.DataFrame(final_worths).hvplot(kind='hist')

## 5. SVC on scaled all data

In [523]:
# shifted is a df globally. shift(3) is required to match signal (daily return, lagged, twice_lagged)
# shifted data is for 
shifted_dfs = {}
for key in ticker_data:
    shifted_dfs[key] = dfs[key].drop('symbol',axis=1).shift(3).dropna()

### 5.1 the SVC ML

In [524]:
from sklearn.preprocessing import StandardScaler

def svc_ML( data, label ):
    """ one cycle of 45 (30+15) days, using SVR on all cols """
   
    # scale for X
    scaler = StandardScaler()
    scaler.fit(data)
    all_data = pd.DataFrame(scaler.transform(data))
    all_label = label
    
    instrs = pd.DataFrame()
    model = svm.SVC(kernel='rbf')
    
    # rolling train for 30 days then predict 1 day, 15 cycles for 15 instructions
    for instri in range(0,15):
        iloc_to = instri + 90
        X = all_data.iloc[:iloc_to,:]
        y = all_label.iloc[:iloc_to]
        model.fit(X, y)
        signal = model.predict(all_data.iloc[iloc_to:iloc_to+1,:])
        instr = signal[0]
        one_row = pd.DataFrame([[data.index[iloc_to].date(), instr]])
        instrs = pd.concat([instrs,one_row], join='outer')
        
    return instrs

### 5.2 one cycle of 105 days

In [525]:
instrs = pd.DataFrame()
for key in shifted_dfs:
    cycle = shifted_dfs[key].iloc[-105:,:]
    data = cycle
    label = signals_dfs[key]['signal']
    cycle_signals = svr_ML(data, label).set_index(0)
    cycle_signals.columns = [key]
    instrs = pd.concat([instrs, cycle_signals],axis=1)

instrs

Unnamed: 0_level_0,AAPL,AMZN,MSFT,GOOG
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-08-11,1,1,-1,1
2023-08-14,-1,1,-1,1
2023-08-15,-1,1,-1,-1
2023-08-16,-1,1,-1,1
2023-08-17,-1,1,-1,-1
2023-08-18,-1,1,-1,-1
2023-08-21,-1,1,-1,-1
2023-08-22,-1,1,-1,1
2023-08-23,-1,1,-1,1
2023-08-24,-1,1,-1,1


#### 5.2.1 15-day trade actions

In [526]:
instr_df = derive_instr(instrs)
instr_df

Unnamed: 0,0,1,2
0,2023-08-11,AAPL,1
0,2023-08-14,AAPL,-1
0,2023-08-15,AMZN,1
0,2023-08-16,GOOG,1
0,2023-08-17,GOOG,-1
0,2023-08-18,AAPL,0
0,2023-08-21,AAPL,0
0,2023-08-22,GOOG,1
0,2023-08-23,AAPL,0
0,2023-08-24,AAPL,0


#### 5.2.2 show result

In [527]:
trade_action(instr_df)

On 2023-08-11 trade AAPL
{'action': 'buy', 'price': 177.57, 'bal': 6803.74, 'share': 18, 'status': 0, 'msg': 'success'}
On 2023-08-14 trade AAPL
{'action': 'sell', 'price': 178.61, 'bal': 10018.72, 'share': 18, 'status': 0, 'msg': 'success'}
On 2023-08-15 trade AMZN
{'action': 'buy', 'price': 139.06, 'bal': 6820.34, 'share': 23, 'status': 0, 'msg': 'success'}
On 2023-08-16 trade GOOG
{'action': 'buy', 'price': 129.44, 'bal': 3584.34, 'share': 25, 'status': 0, 'msg': 'success'}
On 2023-08-17 trade GOOG
{'action': 'sell', 'price': 130.81, 'bal': 6854.59, 'share': 25, 'status': 0, 'msg': 'success'}
On 2023-08-18 trade AAPL
{'action': 'none', 'price': 173.46, 'bal': 6854.59, 'share': 0, 'status': -1, 'msg': 'invalid action (1/-1 only)'}
On 2023-08-21 trade AAPL
{'action': 'none', 'price': 175.19, 'bal': 6854.59, 'share': 0, 'status': -1, 'msg': 'invalid action (1/-1 only)'}
On 2023-08-22 trade GOOG
{'action': 'buy', 'price': 129.67, 'bal': 3612.84, 'share': 25, 'status': 0, 'msg': 'success

### 5.3 rolling run 500 times

In [528]:
def one_run(str_loc):
    instrs = pd.DataFrame()
    for key in shifted_dfs:
        cycle = shifted_dfs[key].iloc[str_loc:,:]
        cycle_signals = svc_ML(cycle, signals_dfs[key]['signal']).set_index(0)
        cycle_signals.columns = [key]
        instrs = pd.concat([instrs, cycle_signals],axis=1)
        
    instr_df = derive_instr(instrs)
    return trade_action(instr_df, verbose=0)

final_worths = []
for n in range(500):
    final_worths.append( one_run(-105-n) )

pd.DataFrame(final_worths).hvplot(kind='hist')

sleep 3 seconds and retrying https://data.alpaca.markets/v2/stocks/bars 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v2/stocks/bars 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v2/stocks/bars 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v2/stocks/bars 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v2/stocks/bars 3 more time(s)...
sleep 3 seconds and retrying https://data.alpaca.markets/v2/stocks/bars 3 more time(s)...


## 6. svr on pct_change of all columns

In [None]:
# shifted is a df globally. first we cal pct_change, then shift(2) is required to match signal (daily return, lagged, twice_lagged)
shifted_dfs = {}
for key in ticker_data:
    shifted_dfs[key] = dfs[key].drop('symbol',axis=1).pct_change().shift(2)
    shifted_dfs[key].dropna(inplace=True)

shifted_dfs

### 6.1 train and predict

In [513]:
def one_cycle( data, label ):
    """ one cycle of 45 (30+15) days, using SVR directly on vwap pct_change """
   
    # use lagged returns to train SVM
    instrs = pd.DataFrame()
    model = svm.SVR()
    
    # rolling train for 30 days then predict 1 day, 15 cycles for 15 instructions
    for instri in range(0,15):
        iloc_to = instri + 30
        X = data.iloc[:iloc_to,:]
        y = label.iloc[:iloc_to]
        model.fit(X, y)
        signal = model.predict(data.iloc[iloc_to:iloc_to+1,:])
        
        if signal > 0:
            instr = 1
        else:
            instr = -1
        #one_row = pd.DataFrame([[signals.index[iloc_to].date(), cycle['symbol'][0], instr]])
        one_row = pd.DataFrame([[cycle.index[iloc_to].date(), instr]])
        instrs = pd.concat([instrs,one_row], join='outer')
        
    return instrs

### 6.2 one cycle of 45 days

In [514]:
instrs = pd.DataFrame()
for key in shifted_dfs:
    cycle = shifted_dfs[key].iloc[-45:,:]
    cycle_signals = one_cycle(cycle, signals_dfs[key]['signal']).set_index(0)
    cycle_signals.columns = [key]
    instrs = pd.concat([instrs, cycle_signals],axis=1)

instrs

Unnamed: 0_level_0,AAPL,AMZN,MSFT,GOOG
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-08-11,1,1,-1,1
2023-08-14,1,1,1,1
2023-08-15,1,1,-1,1
2023-08-16,-1,1,1,1
2023-08-17,1,1,-1,1
2023-08-18,1,1,-1,1
2023-08-21,1,1,-1,1
2023-08-22,-1,-1,-1,1
2023-08-23,-1,1,-1,1
2023-08-24,-1,1,1,1


#### 6.2.1 15 day trade action

In [515]:
instr_df = derive_instr(instrs)
instr_df

Unnamed: 0,0,1,2
0,2023-08-11,AAPL,1
0,2023-08-14,AMZN,1
0,2023-08-15,GOOG,1
0,2023-08-16,AAPL,-1
0,2023-08-17,AAPL,1
0,2023-08-18,AAPL,0
0,2023-08-21,AAPL,0
0,2023-08-22,AAPL,-1
0,2023-08-23,AAPL,0
0,2023-08-24,MSFT,1


#### 6.2.2 show result

In [516]:
trade_action(instr_df)

On 2023-08-11 trade AAPL
{'action': 'buy', 'price': 177.57, 'bal': 7514.02, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-14 trade AMZN
{'action': 'buy', 'price': 139.3, 'bal': 5145.92, 'share': 17, 'status': 0, 'msg': 'success'}
On 2023-08-15 trade GOOG
{'action': 'buy', 'price': 130.92, 'bal': 2658.44, 'share': 19, 'status': 0, 'msg': 'success'}
On 2023-08-16 trade AAPL
{'action': 'sell', 'price': 177.18, 'bal': 5138.96, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-17 trade AAPL
{'action': 'buy', 'price': 175.53, 'bal': 2681.54, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-18 trade AAPL
{'action': 'none', 'price': 173.46, 'bal': 2681.54, 'share': 14, 'status': -1, 'msg': 'invalid action (1/-1 only)'}
On 2023-08-21 trade AAPL
{'action': 'none', 'price': 175.19, 'bal': 2681.54, 'share': 14, 'status': -1, 'msg': 'invalid action (1/-1 only)'}
On 2023-08-22 trade AAPL
{'action': 'sell', 'price': 177.06, 'bal': 5160.38, 'share': 14, 'status': 0, 'msg': 'success

### 6.3 rolling run 500 times

In [522]:
def one_run(str_loc):
    instrs = pd.DataFrame()
    for key in shifted_dfs:
        cycle = shifted_dfs[key].iloc[str_loc:,:]
        cycle_signals = one_cycle(cycle, signals_dfs[key]['signal']).set_index(0)
        cycle_signals.columns = [key]
        instrs = pd.concat([instrs, cycle_signals],axis=1)
        
    instr_df = derive_instr(instrs)
    return trade_action(instr_df, verbose=0)

final_worths = []
for n in range(500):
    final_worths.append( one_run(-45-n) )

pd.DataFrame(final_worths).hvplot(kind='hist')

## 7. random trade action

### 7.1 randomly generate trade actions

In [420]:
instrs = np.random.randn(50, 1)

def sample_df():
    instri = 0

    # using alpaca data for the dates to avoid non-trading days
    df_market = dfs[tickers[0]].iloc[-15:,:]
    
    df = pd.DataFrame()
    for index, data in df_market.iterrows():
        if instrs[instri] > 0:
            instr = 1
        else:
            instr = -1
        instri += 1
        one_row = pd.DataFrame([[index.date(), tickers[np.random.randint(4)], instr]])
        df = pd.concat([df,one_row], join='outer')
    return df

random_actions = sample_df()
random_actions

Unnamed: 0,0,1,2
0,2023-08-10,AAPL,-1
0,2023-08-11,AAPL,1
0,2023-08-14,GOOG,-1
0,2023-08-15,AMZN,-1
0,2023-08-16,AMZN,-1
0,2023-08-17,AAPL,1
0,2023-08-18,GOOG,1
0,2023-08-21,AMZN,-1
0,2023-08-22,MSFT,-1
0,2023-08-23,MSFT,-1


### 7.2 show result

In [421]:
trade_action(random_actions)

On 2023-08-10 trade AAPL
{'action': 'none', 'price': 178.95, 'bal': 10000.0, 'share': 0, 'status': -1, 'msg': 'no share to sell'}
On 2023-08-11 trade AAPL
{'action': 'buy', 'price': 177.57, 'bal': 7514.02, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-14 trade GOOG
{'action': 'none', 'price': 130.8, 'bal': 7514.02, 'share': 0, 'status': -1, 'msg': 'no share to sell'}
On 2023-08-15 trade AMZN
{'action': 'none', 'price': 139.06, 'bal': 7514.02, 'share': 0, 'status': -1, 'msg': 'no share to sell'}
On 2023-08-16 trade AMZN
{'action': 'none', 'price': 136.14, 'bal': 7514.02, 'share': 0, 'status': -1, 'msg': 'no share to sell'}
On 2023-08-17 trade AAPL
{'action': 'buy', 'price': 175.53, 'bal': 5056.6, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-18 trade GOOG
{'action': 'buy', 'price': 128.5, 'bal': 2615.1, 'share': 19, 'status': 0, 'msg': 'success'}
On 2023-08-21 trade AMZN
{'action': 'none', 'price': 134.08, 'bal': 2615.1, 'share': 0, 'status': -1, 'msg': 'no share to se

### 7.3 run 500 times random actions see distribution

In [422]:
final_worths = []
for n in range(500):
    final_worths.append(trade_action( sample_df(), verbose=0 ))
pd.DataFrame(final_worths).hvplot(kind='hist')