In [None]:
import numpy as np
import pandas as pd
from dotenv import load_dotenv
import os
import alpaca_trade_api as tradeapi
from trade import trade_action
from datetime import datetime, timedelta
from sklearn import svm

In [None]:
def market_data( start, end, tickers ):
    load_dotenv()
    alpaca_api_key = os.getenv('ALPACA_API_KEY')
    alpaca_secret_key = os.getenv('ALPACA_SECRET_KEY')

    # Create the Alpaca API object
    alpaca_api = tradeapi.REST(
       alpaca_api_key,
       alpaca_secret_key,
       api_version = 'v2'
    )
    
    # Set timeframe to "1Day" for Alpaca API
    timeframe = "1Day"
    
    start_date = pd.Timestamp(start,tz='America/New_York')
    end_date = pd.Timestamp(end,tz='America/New_York')
           
    # Get number_of_years' worth of historical data for tickers
    data_df = alpaca_api.get_bars(
        tickers,
        timeframe,
        start = start_date.isoformat(),
        end = end_date.isoformat()
    ).df
    if len(data_df) == 0:
        return []
    return data_df

#df_market = market_data( '2023-08-01', '2023-08-21', tickers=['TSLA'] )

## Plan
* Get all ticker data for a year
* Get one ticker, get signal column
* Slice backwards, 15 for pred, 30 for train
* Scale train
* Rolling train, 30 at a time, predict one day at a time, till the end
* Store the prediction in my instr_df

In [432]:
def derive_instr( instrs ):
    buffer = [0]*4
    instr_df = pd.DataFrame()
    for index, instr in instrs.iterrows():
        #print(index, buffer, instr)
        one_row = pd.DataFrame()
        for idx in range(4):
            if buffer[idx]==1 and instr[idx]==-1:
                #print('In sell:', index, idx, -1)
                one_row = pd.DataFrame([[index, instr.index[idx], -1]])
                buffer[idx]=-1
                break
            elif buffer[idx]!=1 and instr[idx]==1:
                #print('In buy:',index, idx, 1)
                one_row = pd.DataFrame([[index, instr.index[idx], 1]])
                buffer[idx]=1
                break

        if len(one_row) == 0:
            one_row = pd.DataFrame([[index, instr.index[0], 0]])
        instr_df = pd.concat([instr_df, one_row])
    
    return instr_df

# my main

## 1. get market data

In [445]:
today = datetime.today().date()
tickers = ['AAPL', 'AMZN', 'MSFT', 'GOOG']
date_from = today - timedelta(days=365)
data_df = market_data(date_from, today, tickers)

dfs = {}
for ticker in tickers:
    df = data_df[data_df['symbol']==ticker]
    dfs[ticker] = df
                  

## 2. prep data for algo

In [446]:
# signals is a df globally
# construct singals data:
signals_dfs = {}
first = True
for key in dfs:
    signals = dfs[key].loc[:,['vwap']]
    signals['daily_return'] = signals.pct_change()
    signals['lagged_daily_return'] = signals['daily_return'].shift(1)
    signals['twice_lagged'] = signals['daily_return'].shift(2)
    signals.dropna(inplace=True)
    signals_dfs[key]=signals
    if first:
        visual = signals[['daily_return','lagged_daily_return','twice_lagged']].hvplot(
            title=key,
            frame_width=700)
        first = False
    else:
        visual = ( visual +
                  signals[['daily_return','lagged_daily_return','twice_lagged']].hvplot(
                      title=key,
                      frame_width=700)
                 )
        
# visualize data prep for anz later
visual

## 3. derive signal based on the observation of data

In [453]:
# derive signal - the singals df is global
for key in signals_dfs:
    # signals_dfs[key]['signal'] = np.where(
    #     (signals_dfs[key]['lagged_daily_return']>0) &
    #     (signals_dfs[key]['twice_lagged']>0), 
    #         -1, 
    #         np.where(
    #             (signals['lagged_daily_return']<0) &
    #             (signals['twice_lagged']<0), 1, 0 
    #                 )
    # )
    signals_dfs[key]['signal'] = np.where(
        signals_dfs[key]['lagged_daily_return']<0, 1, -1)


## 4. SVR on pct_change of vwap

### 4.1 the SVR ML

In [457]:
def one_cycle( cycle ):
    """ one cycle of 45 (30+15) days, using SVR directly on vwap pct_change """
   
    # use lagged returns to train SVM
    data = cycle.drop(['vwap','daily_return','signal'],axis=1)
    label = cycle['signal']
    instrs = pd.DataFrame()
    model = svm.SVR()
    
    # rolling train for 30 days then predict 1 day, 15 cycles for 15 instructions
    for instri in range(0,15):
        iloc_to = instri + 90
        X = data.iloc[:iloc_to,:]
        y = label.iloc[:iloc_to]
        model.fit(X, y)
        signal = model.predict(data.iloc[iloc_to:iloc_to+1,:])
        
        if signal > 0:
            instr = 1
        else:
            instr = -1
        #one_row = pd.DataFrame([[signals.index[iloc_to].date(), cycle['symbol'][0], instr]])
        one_row = pd.DataFrame([[cycle.index[iloc_to].date(), instr]])
        instrs = pd.concat([instrs,one_row], join='outer')
        
    return instrs

### 4.2 One cycle of 45 days (30+15)

In [458]:
instrs = pd.DataFrame()
for key in signals_dfs:
    cycle = signals_dfs[key].iloc[-105:,:]
    cycle_signals = one_cycle(cycle).set_index(0)
    cycle_signals.columns = [key]
    #instrs = pd.concat([instrs, one_cycle(cycle)])
    instrs = pd.concat([instrs, cycle_signals],axis=1)

instrs

Unnamed: 0_level_0,AAPL,AMZN,MSFT,GOOG
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-08-10,1,1,1,1
2023-08-11,-1,-1,-1,-1
2023-08-14,1,1,1,1
2023-08-15,-1,-1,-1,-1
2023-08-16,1,1,-1,1
2023-08-17,1,1,1,1
2023-08-18,1,1,1,-1
2023-08-21,1,1,1,1
2023-08-22,-1,-1,-1,-1
2023-08-23,-1,-1,-1,-1


#### 4.2.1 15-day trade actions

In [459]:
instr_df = derive_instr(instrs)
instr_df

Unnamed: 0,0,1,2
0,2023-08-10,AAPL,1
0,2023-08-11,AAPL,-1
0,2023-08-14,AAPL,1
0,2023-08-15,AAPL,-1
0,2023-08-16,AAPL,1
0,2023-08-17,AMZN,1
0,2023-08-18,MSFT,1
0,2023-08-21,GOOG,1
0,2023-08-22,AAPL,-1
0,2023-08-23,AMZN,-1


#### 4.2.2 show result

In [460]:
trade_action(instr_df)

On 2023-08-10 trade AAPL
{'action': 'buy', 'price': 178.95, 'bal': 7673.65, 'share': 13, 'status': 0, 'msg': 'success'}
On 2023-08-11 trade AAPL
{'action': 'sell', 'price': 177.57, 'bal': 9982.06, 'share': 13, 'status': 0, 'msg': 'success'}
On 2023-08-14 trade AAPL
{'action': 'buy', 'price': 178.61, 'bal': 7660.13, 'share': 13, 'status': 0, 'msg': 'success'}
On 2023-08-15 trade AAPL
{'action': 'sell', 'price': 178.21, 'bal': 9976.86, 'share': 13, 'status': 0, 'msg': 'success'}
On 2023-08-16 trade AAPL
{'action': 'buy', 'price': 177.18, 'bal': 7496.34, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-17 trade AMZN
{'action': 'buy', 'price': 134.76, 'bal': 5070.66, 'share': 18, 'status': 0, 'msg': 'success'}
On 2023-08-18 trade MSFT
{'action': 'buy', 'price': 315.23, 'bal': 2864.05, 'share': 7, 'status': 0, 'msg': 'success'}
On 2023-08-21 trade GOOG
{'action': 'buy', 'price': 128.3, 'bal': 426.35, 'share': 19, 'status': 0, 'msg': 'success'}
On 2023-08-22 trade AAPL
{'action': 'sell

## 5. SVC on scaled all data

In [None]:
# shifted is a df globally. shift(3) is required to match signal (daily return, lagged, twice_lagged)
# shifted data is for 
shifted_dfs = {}
for key in ticker_data:
    shifted_dfs[key] = dfs[key].drop('symbol',axis=1).shift(3).dropna()

### 5.1 the SVC ML

In [478]:
from sklearn.preprocessing import StandardScaler

def svc_ML( data, label ):
    """ one cycle of 45 (30+15) days, using SVR on all cols """
   
    # scale for X
    scaler = StandardScaler()
    scaler.fit(data)
    all_data = pd.DataFrame(scaler.transform(data))
    all_label = label
    
    instrs = pd.DataFrame()
    model = svm.SVC(kernel='rbf')
    
    # rolling train for 30 days then predict 1 day, 15 cycles for 15 instructions
    for instri in range(0,15):
        iloc_to = instri + 90
        X = all_data.iloc[:iloc_to,:]
        y = all_label.iloc[:iloc_to]
        model.fit(X, y)
        signal = model.predict(all_data.iloc[iloc_to:iloc_to+1,:])
        instr = signal[0]
        one_row = pd.DataFrame([[data.index[iloc_to].date(), instr]])
        instrs = pd.concat([instrs,one_row], join='outer')
        
    return instrs

### 5.2 one cycle of 45 days

In [479]:
instrs = pd.DataFrame()
for key in shifted_dfs:
    cycle = shifted_dfs[key].iloc[-105:,:]
    data = cycle
    label = signals_dfs[key]['signal']
    cycle_signals = svr_ML(data, label).set_index(0)
    cycle_signals.columns = [key]
    #instrs = pd.concat([instrs, one_cycle(cycle)])
    instrs = pd.concat([instrs, cycle_signals],axis=1)

instrs

Unnamed: 0_level_0,AAPL,AMZN,MSFT,GOOG
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-08-10,1,-1,1,1
2023-08-11,1,1,-1,1
2023-08-14,1,1,-1,1
2023-08-15,1,1,1,1
2023-08-16,1,1,-1,1
2023-08-17,1,-1,-1,1
2023-08-18,1,1,1,1
2023-08-21,1,1,-1,1
2023-08-22,1,1,1,1
2023-08-23,-1,1,-1,-1


#### 5.2.1 15-day trade actions

In [480]:
instr_df = derive_instr(instrs)
instr_df

Unnamed: 0,0,1,2
0,2023-08-10,AAPL,1
0,2023-08-11,AMZN,1
0,2023-08-14,GOOG,1
0,2023-08-15,MSFT,1
0,2023-08-16,MSFT,-1
0,2023-08-17,AMZN,-1
0,2023-08-18,AMZN,1
0,2023-08-21,AAPL,0
0,2023-08-22,MSFT,1
0,2023-08-23,AAPL,-1


#### 5.2.2 show result

In [481]:
trade_action(instr_df)

On 2023-08-10 trade AAPL
{'action': 'buy', 'price': 178.95, 'bal': 7673.65, 'share': 13, 'status': 0, 'msg': 'success'}
On 2023-08-11 trade AMZN
{'action': 'buy', 'price': 138.03, 'bal': 5189.11, 'share': 18, 'status': 0, 'msg': 'success'}
On 2023-08-14 trade GOOG
{'action': 'buy', 'price': 130.8, 'bal': 2703.91, 'share': 19, 'status': 0, 'msg': 'success'}
On 2023-08-15 trade MSFT
{'action': 'buy', 'price': 322.71, 'bal': 444.94, 'share': 7, 'status': 0, 'msg': 'success'}
On 2023-08-16 trade MSFT
{'action': 'sell', 'price': 321.36, 'bal': 2694.46, 'share': 7, 'status': 0, 'msg': 'success'}
On 2023-08-17 trade AMZN
{'action': 'sell', 'price': 134.76, 'bal': 5120.14, 'share': 18, 'status': 0, 'msg': 'success'}
On 2023-08-18 trade AMZN
{'action': 'buy', 'price': 132.52, 'bal': 2734.78, 'share': 18, 'status': 0, 'msg': 'success'}
On 2023-08-21 trade AAPL
{'action': 'none', 'price': 175.19, 'bal': 2734.78, 'share': 13, 'status': -1, 'msg': 'invalid action (1/-1 only)'}
On 2023-08-22 trade M

## 6. svr on pct_change of all columns

In [None]:
# shifted is a df globally. first we cal pct_change, then shift(2) is required to match signal (daily return, lagged, twice_lagged)
shifted_dfs = {}
for key in ticker_data:
    shifted_dfs[key] = dfs[key].drop('symbol',axis=1).pct_change().shift(2)
    shifted_dfs[key].dropna(inplace=True)

shifted_dfs

### 6.1 train and predict

In [None]:
def one_cycle( data, label ):
    """ one cycle of 45 (30+15) days, using SVR directly on vwap pct_change """
   
    # use lagged returns to train SVM
    instrs = pd.DataFrame()
    model = svm.SVR()
    
    # rolling train for 30 days then predict 1 day, 15 cycles for 15 instructions
    for instri in range(0,15):
        iloc_to = instri + 30
        X = data.iloc[:iloc_to,:]
        y = label.iloc[:iloc_to]
        model.fit(X, y)
        signal = model.predict(data.iloc[iloc_to:iloc_to+1,:])
        
        if signal > 0:
            instr = 1
        else:
            instr = -1
        #one_row = pd.DataFrame([[signals.index[iloc_to].date(), cycle['symbol'][0], instr]])
        one_row = pd.DataFrame([[cycle.index[iloc_to].date(), instr]])
        instrs = pd.concat([instrs,one_row], join='outer')
        
    return instrs

### 6.2 one cycle of 45 days

In [433]:
instrs = pd.DataFrame()
for key in shifted_dfs:
    cycle = shifted_dfs[key].iloc[-45:,:]
    cycle_signals = one_cycle(cycle, signals_dfs[key]['signal']).set_index(0)
    cycle_signals.columns = [key]
    instrs = pd.concat([instrs, cycle_signals],axis=1)

instrs

Unnamed: 0_level_0,AAPL,AMZN,MSFT,GOOG
0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2023-08-10,-1,1,1,1
2023-08-11,1,1,1,1
2023-08-14,-1,1,1,1
2023-08-15,1,1,-1,-1
2023-08-16,-1,1,1,1
2023-08-17,1,1,-1,-1
2023-08-18,1,1,-1,1
2023-08-21,1,1,-1,-1
2023-08-22,-1,1,-1,1
2023-08-23,-1,1,-1,-1


#### 6.2.1 15 day trade action

In [434]:
instr_df = derive_instr(instrs)
instr_df

Unnamed: 0,0,1,2
0,2023-08-10,AMZN,1
0,2023-08-11,AAPL,1
0,2023-08-14,AAPL,-1
0,2023-08-15,AAPL,1
0,2023-08-16,AAPL,-1
0,2023-08-17,AAPL,1
0,2023-08-18,GOOG,1
0,2023-08-21,GOOG,-1
0,2023-08-22,AAPL,-1
0,2023-08-23,AAPL,0


#### 6.2.2 show result

In [435]:
trade_action(instr_df)

On 2023-08-10 trade AMZN
{'action': 'buy', 'price': 138.88, 'bal': 7500.16, 'share': 18, 'status': 0, 'msg': 'success'}
On 2023-08-11 trade AAPL
{'action': 'buy', 'price': 177.57, 'bal': 5014.18, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-14 trade AAPL
{'action': 'sell', 'price': 178.61, 'bal': 7514.72, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-15 trade AAPL
{'action': 'buy', 'price': 178.21, 'bal': 5019.78, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-16 trade AAPL
{'action': 'sell', 'price': 177.18, 'bal': 7500.3, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-17 trade AAPL
{'action': 'buy', 'price': 175.53, 'bal': 5042.88, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-18 trade GOOG
{'action': 'buy', 'price': 128.5, 'bal': 2601.38, 'share': 19, 'status': 0, 'msg': 'success'}
On 2023-08-21 trade GOOG
{'action': 'sell', 'price': 128.3, 'bal': 5039.08, 'share': 19, 'status': 0, 'msg': 'success'}
On 2023-08-22 trade AAPL
{'action': 'sel

## 7. random trade action

### 7.1 randomly generate trade actions

In [420]:
instrs = np.random.randn(50, 1)

def sample_df():
    instri = 0

    # using alpaca data for the dates to avoid non-trading days
    df_market = dfs[tickers[0]].iloc[-15:,:]
    
    df = pd.DataFrame()
    for index, data in df_market.iterrows():
        if instrs[instri] > 0:
            instr = 1
        else:
            instr = -1
        instri += 1
        one_row = pd.DataFrame([[index.date(), tickers[np.random.randint(4)], instr]])
        df = pd.concat([df,one_row], join='outer')
    return df

random_actions = sample_df()
random_actions

Unnamed: 0,0,1,2
0,2023-08-10,AAPL,-1
0,2023-08-11,AAPL,1
0,2023-08-14,GOOG,-1
0,2023-08-15,AMZN,-1
0,2023-08-16,AMZN,-1
0,2023-08-17,AAPL,1
0,2023-08-18,GOOG,1
0,2023-08-21,AMZN,-1
0,2023-08-22,MSFT,-1
0,2023-08-23,MSFT,-1


### 7.2 show result

In [421]:
trade_action(random_actions)

On 2023-08-10 trade AAPL
{'action': 'none', 'price': 178.95, 'bal': 10000.0, 'share': 0, 'status': -1, 'msg': 'no share to sell'}
On 2023-08-11 trade AAPL
{'action': 'buy', 'price': 177.57, 'bal': 7514.02, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-14 trade GOOG
{'action': 'none', 'price': 130.8, 'bal': 7514.02, 'share': 0, 'status': -1, 'msg': 'no share to sell'}
On 2023-08-15 trade AMZN
{'action': 'none', 'price': 139.06, 'bal': 7514.02, 'share': 0, 'status': -1, 'msg': 'no share to sell'}
On 2023-08-16 trade AMZN
{'action': 'none', 'price': 136.14, 'bal': 7514.02, 'share': 0, 'status': -1, 'msg': 'no share to sell'}
On 2023-08-17 trade AAPL
{'action': 'buy', 'price': 175.53, 'bal': 5056.6, 'share': 14, 'status': 0, 'msg': 'success'}
On 2023-08-18 trade GOOG
{'action': 'buy', 'price': 128.5, 'bal': 2615.1, 'share': 19, 'status': 0, 'msg': 'success'}
On 2023-08-21 trade AMZN
{'action': 'none', 'price': 134.08, 'bal': 2615.1, 'share': 0, 'status': -1, 'msg': 'no share to se

### 7.3 run 500 times random actions see distribution

In [422]:
final_worths = []
for n in range(500):
    final_worths.append(trade_action( sample_df(), verbose=0 ))
pd.DataFrame(final_worths).hvplot(kind='hist')