In [339]:
import datetime as dt
import os

In [340]:
import numpy as np
import pandas as pd
# pd.set_option('display.max_rows', None)

In [341]:
from alpaca.data.requests import NewsRequest
from alpaca.data import StockHistoricalDataClient, TimeFrame 
from alpaca.data.requests import StockQuotesRequest, StockBarsRequest

from alpaca.trading.client import TradingClient

KEY = "PKQ9XJDJLTX686HE3ZL9"
SECRET = "4ES985YvYlOWz0eAtzdhcETJ1asEPBSw3gq9ZXs7"
ENDP = "https://paper-api.alpaca.markets"

In [342]:
import tensorflow as tf
import keras

from tensorflow.keras import Sequential
from tensorflow.keras.layers import LSTM, Dense, Embedding, GlobalAveragePooling1D, Conv2D, ConvLSTM2D, ConvLSTM1D, Input, Flatten, Reshape, TextVectorization, concatenate


In [343]:
def build_model(time_steps, items):
    input_volatility = Input(shape=(time_steps, items), name="input_vol")
    input_prem = Input(shape=(1,), name="input_prem")
    input_dte = Input(shape=(1,), name="input_dte")

    lstm1 = LSTM(units=16, return_sequences=True,input_shape=(time_steps, items))(input_volatility)
    lstm2 = LSTM(units=8, return_sequences=False)(lstm1)
    concatted = concatenate([lstm2, input_prem, input_dte])
    dense = Dense(units=1, activation="sigmoid")(concatted)
    model = keras.Model(inputs = [input_volatility, input_prem, input_dte], outputs=dense)

    model.compile(optimizer="adam", loss="binary_crossentropy")

    return model

    

In [344]:
def collect_dfs(dir_path):
    directory_path = dir_path
    dfs = []
    for filename in os.listdir(directory_path):
        if filename.endswith('.txt'):
            print(filename)
            file_path = os.path.join(directory_path, filename)

            # Read the text file into a DataFrame (adjust read_csv parameters based on your file format)
            df = pd.read_csv(file_path, sep=", ")
            df = df[(df["[STRIKE_DISTANCE]"] < 1) & (df["[DTE]"] > 25) & (df["[DTE]"] < 35)]
            # Append the DataFrame to the list
            dfs.append(df)
    full_df = pd.concat(dfs, ignore_index=True)
    print(full_df.shape)
    return full_df

In [345]:
def get_data(stocks, start_date, end_date):
    data_client = StockHistoricalDataClient(KEY, SECRET)

    request_params = StockBarsRequest(
        symbol_or_symbols=stocks,
        timeframe=TimeFrame.Day,
        adjustment="split",
        start=start_date,
        end=end_date
        )



    bars_df = data_client.get_stock_bars(request_params).df.tz_convert('America/New_York', level=1)
    stock_data = pd.DataFrame(bars_df)
    # print(stock_data)
    stock_data = stock_data.groupby(['symbol', 'timestamp']).mean().unstack(level=0)
    open_prices = stock_data["open"]
    log_returns = np.log((open_prices.pct_change()+1).dropna())
    return log_returns, open_prices

In [346]:
def compute_vol(raw_data, interval):
   
    data = raw_data
    while(len(data)%interval != 0):
        data.drop(data.tail(1).index,inplace=True)
   
    var_data = np.array(data[len(data)%interval:]).reshape(len(data)//interval, interval,-1).var(axis=1)
    # print(var_data)
    return np.sqrt(var_data)

Plan: 

For every option:
    - Get the vol of past 5 months before option was released
    - Get price now, get price at exp. Get profit (if profit positive, "long" else "short")

In [347]:
def build_train_data():
    options_data = collect_dfs("./aapl")
    # quit()


    today = dt.datetime.now()
    returns, prices = get_data(["AAPL", "MSFT", "GOOG", "NVDA"], today - dt.timedelta(7000), today)
    vol_inputs = []
    prem_inputs = []
    dte_inputs = []
    labels = []
    for index, row in options_data.iterrows():
        try:
            quote_date = row["[QUOTE_DATE]"]
            exp_date   = row["[EXPIRE_DATE]"]

            strike = row["[STRIKE]"]

            call_last = row["[C_LAST]"]
            put_last = row["[P_LAST]"]

            dte = row["[DTE]"]

            quote_price = prices.loc[quote_date, "AAPL"]
            exp_price = prices.loc[exp_date, "AAPL"]
            # print(type(strike))
        
            if (exp_price-strike-call_last-put_last).item() > 0:
                labels.append(1)
            elif (strike-exp_price-call_last-put_last).item() > 0:
                labels.append(1)
            else:
                labels.append(0)
            
            now = dt.datetime.strptime(quote_date, '%Y-%m-%d').date()
            start = now -dt.timedelta(3000)

            # print(prices.loc[start:now], 30)

            temp = compute_vol(returns.loc[start:now], 30)
            temp = temp[-20:]
            # print(temp)
            vol_inputs.append(temp)
            prem_inputs.append(((call_last+put_last)/strike))
            dte_inputs.append(dte)
        except:
            continue
        # break
    # print(inputs)
    return vol_inputs, prem_inputs, dte_inputs, labels




    

In [348]:
def train_model(model):
    inputs, prem_inputs, dte_inputs, labels = build_train_data()
    # labels = np.array(labels)
    # print(np.array(inputs).shape, np.array(labels).shape)
    # print(np.array(prem_inputs).shape, np.array(dte_inputs).shape)
    # print(inputs)
    model.fit(
        [np.array(inputs),
        np.array(prem_inputs),
        np.array(dte_inputs)],
        np.array(labels),
        epochs=20,
        batch_size=16,
    )

In [349]:
def make_prediction(model, vol, prem, dte):
    return model.predict([vol, prem, dte])

In [350]:
def backtest(model):
   
    options_data = collect_dfs("./test")

    today = dt.datetime.now()
    returns, prices = get_data(["AAPL", "MSFT", "GOOG", "NVDA"], today - dt.timedelta(7000), today)
    score = 0
    count = 0
    profits = []
    predicts = []
    for index, row in options_data.iterrows():
        try:
            quote_date = row["[QUOTE_DATE]"]
            exp_date   = row["[EXPIRE_DATE]"]

            strike = row["[STRIKE]"]

            call_last = row["[C_LAST]"]
            put_last = row["[P_LAST]"]

            dte = row["[DTE]"]

            quote_price = prices.loc[quote_date, "AAPL"]
            exp_price = prices.loc[exp_date, "AAPL"]
            # print(type(strike))
        
            
            now = dt.datetime.strptime(quote_date, '%Y-%m-%d').date()
            start = now -dt.timedelta(3000)

            # print(prices.loc[start:now], 30)
            temp = compute_vol(returns.loc[start:now], 30)
            temp = temp[-20:]
            prem_input = (call_last+put_last)/strike

            # print("hi", np.array(temp))
            # print("other", np.array(prem_input), np.array(dte))
            prediction = model.predict([np.array([temp]), np.array([prem_input]), np.array([dte])])
            
            prediction = prediction.flatten()[0]
            print(prediction)
            if prediction > 0.5:
                prediction = 1
            else:
                prediction = 0
            
            
            if prediction == 1:
                profit = max((exp_price-strike-call_last-put_last).item(), (strike-exp_price-call_last-put_last).item())
            else:
                # profit = min((strike+put_last+call_last-exp_price).item(), (exp_price-strike+put_last+call_last).item())
                profit = max((exp_price-strike-call_last-put_last).item(), (strike-exp_price-call_last-put_last).item())*-1
            profits.append(profit)
            
            if (exp_price-strike-call_last-put_last).item() > 0:
                actual = 1
            elif (strike-exp_price-call_last-put_last).item() > 0:
                actual = 1
            else:
                actual = 0
            predicts.append((actual, prediction))
            count += 1
            if actual == prediction:
                score +=1
        except:
            continue    
        # break
    
    print("score:", score/count)
    print("count", count)
    print("profit", np.array(profits).mean())
    # print("cost", call_last+put_last)
    print(predicts)



In [351]:
model = build_model(20,4)
train_model(model)
backtest(model)  

# print(input)



aapl_eod_202207.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202301.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202209.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202212.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202204.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202303.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202210.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202202.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202201.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202208.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202302.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202205.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202211.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202203.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202206.txt


  df = pd.read_csv(file_path, sep=", ")


(302, 33)


  temp = compute_vol(returns.loc[start:now], 30)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.drop(data.tail(1).index,inplace=True)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
aapl_eod_202305.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202304.txt


  df = pd.read_csv(file_path, sep=", ")


aapl_eod_202306.txt


  df = pd.read_csv(file_path, sep=", ")


(28, 33)


  temp = compute_vol(returns.loc[start:now], 30)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data.drop(data.tail(1).index,inplace=True)


0.5617847
0.4786694
0.47559723
0.44749582
0.55982906
0.44827867
0.5344076
0.4778345
0.56252694
0.53480136
0.50655216
0.47790706
0.44955295
0.5616827
0.4493751
0.56171954
0.44909436
0.47617334
0.5603099
0.53171456
0.47545436
0.4470445
0.53163517
0.4750134
0.44703346
0.5598491
0.5033921
0.4754039
score: 0.6071428571428571
count 28
profit 0.1528571428571439
[(1, 1), (1, 0), (1, 0), (1, 0), (1, 1), (1, 0), (0, 1), (0, 0), (0, 1), (1, 1), (1, 1), (0, 0), (0, 0), (1, 1), (1, 0), (0, 1), (0, 0), (1, 0), (1, 1), (1, 1), (1, 0), (0, 0), (1, 1), (0, 0), (0, 0), (1, 1), (0, 1), (0, 0)]
