In [2443]:
import datetime as dt

In [2444]:
import numpy as np
import pandas as pd
pd.set_option('display.max_rows', None)

In [2445]:
from alpaca.data.requests import NewsRequest
from alpaca.data import StockHistoricalDataClient, TimeFrame 
from alpaca.data.requests import StockQuotesRequest, StockBarsRequest

from alpaca.trading.client import TradingClient

KEY = "PKQ9XJDJLTX686HE3ZL9"
SECRET = "4ES985YvYlOWz0eAtzdhcETJ1asEPBSw3gq9ZXs7"
ENDP = "https://paper-api.alpaca.markets"

In [2446]:
import tensorflow as tf
import keras

from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, GlobalAveragePooling1D, Conv2D, ConvLSTM2D, ConvLSTM1D, Input, Flatten, Reshape, TextVectorization, concatenate


In [2447]:
def build_model(time_steps, items):
    input_volatility = Input(shape=(time_steps, items), name="input_vol")
    input_prem = Input(shape=(1,), name="input_prem")
    input_dte = Input(shape=(1,), name="input_dte")

    lstm1 = LSTM(units=64, return_sequences=False,input_shape=(time_steps, items))(input_volatility)
    concatted = concatenate([lstm1, input_prem, input_dte])
    dense = Dense(units=1, activation="sigmoid")(concatted)
    model = keras.Model(inputs = [input_volatility, input_prem, input_dte], outputs=dense)

    model.compile(optimizer="adam", loss="binary_crossentropy")

    return model

    

In [2448]:
def get_data(stocks, start_date, end_date):
    data_client = StockHistoricalDataClient(KEY, SECRET)

    request_params = StockBarsRequest(
        symbol_or_symbols=stocks,
        timeframe=TimeFrame.Day,
        adjustment="split",
        start=start_date,
        end=end_date
        )



    bars_df = data_client.get_stock_bars(request_params).df.tz_convert('America/New_York', level=1)
    stock_data = pd.DataFrame(bars_df)
    # print(stock_data)
    stock_data = stock_data.groupby(['symbol', 'timestamp']).mean().unstack(level=0)
    open_prices = stock_data["open"]
    log_returns = np.log((open_prices.pct_change()+1).dropna())
    return log_returns, open_prices

In [2449]:
def compute_vol(raw_data, interval):
   
    data = raw_data
    while(len(data)%interval != 0):
        data.drop(data.tail(1).index,inplace=True)
   
    var_data = np.array(data[len(data)%interval:]).reshape(len(data)//interval, interval,-1).var(axis=1)
    # print(var_data)
    return np.sqrt(var_data)

Plan: 

For every option:
    - Get the vol of past 5 months before option was released
    - Get price now, get price at exp. Get profit (if profit positive, "long" else "short")

In [2450]:
def build_train_data():
    options_data = pd.read_csv('./aapl/aapl_eod_202301.txt', sep=", ")
    options_data = options_data[(options_data["[STRIKE_DISTANCE]"] < 1) & (options_data["[DTE]"] > 20) & (options_data["[DTE]"] < 60)]


    today = dt.datetime.now()
    returns, prices = get_data(["AAPL", "NVDA"], today - dt.timedelta(7000), today)
    vol_inputs = []
    prem_inputs = []
    dte_inputs = []
    labels = []
    for index, row in options_data.iterrows():
        # try:
        quote_date = row["[QUOTE_DATE]"]
        exp_date   = row["[EXPIRE_DATE]"]

        strike = row["[STRIKE]"]

        call_last = row["[C_LAST]"]
        put_last = row["[P_LAST]"]

        dte = row["[DTE]"]

        quote_price = prices.loc[quote_date, "AAPL"]
        exp_price = prices.loc[exp_date, "AAPL"]
        # print(type(strike))
       
        if (exp_price-strike-call_last-put_last).item() > 0:
            labels.append(1)
        elif (strike-exp_price-call_last-put_last).item() > 0:
            labels.append(1)
        else:
            labels.append(0)
        
        now = dt.datetime.strptime(quote_date, '%Y-%m-%d').date()
        start = now -dt.timedelta(200)

        # print(prices.loc[start:now], 30)

        temp = compute_vol(prices.loc[start:now], 30)
        # print(temp)
        vol_inputs.append(temp)
        prem_inputs.append(((call_last+put_last)/strike))
        dte_inputs.append(dte)
        # break
    # print(inputs)
    return vol_inputs, prem_inputs, dte_inputs, labels




    

In [2451]:
def train_model(model):
    inputs, prem_inputs, dte_inputs, labels = build_train_data()
    # labels = np.array(labels)
    print(np.array(inputs).shape, np.array(labels).shape)
    print(np.array(prem_inputs).shape, np.array(dte_inputs).shape)
    # print(inputs)
    model.fit(
        [np.array(inputs),
        np.array(prem_inputs),
        np.array(dte_inputs)],
        np.array(labels),
        epochs=10,
        batch_size=32,
    )

In [2452]:
def make_prediction(model, vol, prem, dte):
    return model.predict([vol, prem, dte])

In [2453]:
def backtest(model):
    options_data = pd.read_csv('./aapl/aapl_eod_202302.txt', sep=", ")
    options_data = options_data[(options_data["[STRIKE_DISTANCE]"] < 1) & (options_data["[DTE]"] > 20) & (options_data["[DTE]"] < 60)]

    today = dt.datetime.now()
    returns, prices = get_data(["AAPL", "NVDA"], today - dt.timedelta(7000), today)
    score = 0
    count = 0
    profits = []
    for index, row in options_data.iterrows():
        quote_date = row["[QUOTE_DATE]"]
        exp_date   = row["[EXPIRE_DATE]"]

        strike = row["[STRIKE]"]

        call_last = row["[C_LAST]"]
        put_last = row["[P_LAST]"]

        dte = row["[DTE]"]

        quote_price = prices.loc[quote_date, "AAPL"]
        exp_price = prices.loc[exp_date, "AAPL"]
        # print(type(strike))
       
        
        now = dt.datetime.strptime(quote_date, '%Y-%m-%d').date()
        start = now -dt.timedelta(200)

        # print(prices.loc[start:now], 30)

        temp = compute_vol(prices.loc[start:now], 30)
        prem_input = (call_last+put_last)/strike

        # print("hi", np.array(temp))
        # print("other", np.array(prem_input), np.array(dte))
        prediction = model.predict([np.array([temp]), np.array([prem_input]), np.array([dte])])
        
        prediction = int(prediction.flatten()[0])
        
        if prediction == 1:
            profit = max((exp_price-strike-call_last-put_last).item(), (strike-exp_price-call_last-put_last).item())
        else:
            profit = max((strike+put_last+call_last-exp_price).item(), (exp_price-strike+put_last+call_last).item())
        profits.append(profit)
        
        if (exp_price-strike-call_last-put_last).item() > 0:
            actual = 1
        elif (strike-exp_price-call_last-put_last).item() > 0:
            actual = 1
        else:
            actual = 0
        
        count += 1
        if actual == prediction:
            score +=1

        # break
    
    print("score:", score/count)
    print("count", count)
    print("profit", np.array(profits).mean())

In [2454]:
model = build_model(4,2)
train_model(model)
backtest(model)  

# print(input)



  options_data = pd.read_csv('./aapl/aapl_eod_202301.txt', sep=", ")
  temp = compute_vol(prices.loc[start:now], 30)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.drop(data.tail(1).index,inplace=True)


(136, 4, 2) (136,)
(136,) (136,)
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


  options_data = pd.read_csv('./aapl/aapl_eod_202302.txt', sep=", ")
  temp = compute_vol(prices.loc[start:now], 30)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.drop(data.tail(1).index,inplace=True)


score: 0.6144578313253012
count 83
profit 18.706506024096388
