In [1]:
import os, sys

sys.path.append('/Users/gabrielsucich/Desktop/option_trading/')
root = '/Users/gabrielsucich/Desktop/option_trading/'

In [2]:
from datetime import date
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [3]:
from Models.Stocks.StockHistory import *
from Models.MetricLoggers.InterdayMetrics import *
from Models.EventLoggers.InterdayChanges import *

In [4]:
def remove_nonconsecutive_dates(data):
    
    dates = list(data.query("Symbol == 'AAPL'")["Date"])
    dates_to_keep = []
    
    for i, date in enumerate(dates[:-1]):
        next_date = dates[i + 1]
        if days_between(date, next_date) == 1:
            dates_to_keep.append(date)
    
    return data[data["Date"].isin(dates_to_keep)]

def get_stock_data_frame_filename(granularity, volumeRecordLength, pressureRecordLength, priceRecordLength, interdayRecordLength, loadDate = None):
    
    if loadDate == None:
        current_date = date.today().strftime("%Y%m%d")
    
    else:
        current_date = loadDate
    return "StockDataFrames/{0}_g{1}_volume{2}_pressure{3}_price{4}_interday{5}".format(current_date, granularity, volumeRecordLength, pressureRecordLength, priceRecordLength, interdayRecordLength)

def import_stock_dataframe(granularity, volumeRecordLenth, pressureRecordLength, priceRecordLength, interdayRecordLength, date):
    
    data = pd.read_csv(get_stock_data_frame_filename(granularity, volumeRecordLenth, pressureRecordLength, priceRecordLength, interdayRecordLength, loadDate = date))
    data = data.drop(columns = "Unnamed: 0")
    data = remove_nonconsecutive_dates(data)
    return data

In [5]:
data = import_stock_dataframe("quartile", 2, 2, 2, 3, "20201225")

In [6]:
def find_drops(data, drop_threshold):
    
    data = data.copy()
    data["Is Drop"] = data["Next Day Change (%)"] <= -drop_threshold
    data["Is Decrease"] = data["Next Day Change (%)"] < 0
    
    return data.loc[:, "Volume Gradient": 'Prev Day Change (%)'], data[["Is Drop","Is Decrease"]]

def find_nonconsecutive_drops(data, drop_threshold):
    
    data = data.copy()
    data["Is Drop"] = (data["Next Day Change (%)"] <= -drop_threshold)&(data["Prev Day Change (%)"] > 0)
    data["Is Decrease"] = data["Next Day Change (%)"] < 0
    
    return data.loc[:, "Volume Gradient": 'Prev Day Change (%)'], data[["Is Drop","Is Decrease"]]



In [7]:
_, drops = find_drops(data, .5)

In [8]:
sum(drops.iloc[:, 0])*.07

544.25

In [9]:
def get_TwofoldModel_metrics_filename(symbol_set_name, granularity, volumeRecordLength, pressureRecordLength, priceRecordLength, interdayRecordLength, loadDate = None):
    
    if loadDate == None:
        current_date = date.today().strftime("%Y%m%d")
    
    else:
        current_date = loadDate
    return "TwofoldModelMetrics/{0}_s{1}_g{2}_volume{3}_pressure{4}_price{5}_interday{6}".format(current_date, symbol_set_name, granularity, volumeRecordLength, pressureRecordLength, priceRecordLength, interdayRecordLength)

def export_TwofoldModel_metrics_to_csv(df, symbol_set_name, granularity, volumeRecordLength, pressureRecordLength, priceRecordLength, interdayRecordLength, loadDate = None):
    
    filename = get_TwofoldModel_metrics_filename(symbol_set_name, granularity, volumeRecordLength, pressureRecordLength, priceRecordLength, interdayRecordLength, loadDate = loadDate)
    df.to_csv(filename)
    
def import_TwofoldModel_metrics_dataframe(symbol_set_name, granularity, volumeRecordLenth, pressureRecordLength, priceRecordLength, interdayRecordLength, date):
    
    data = pd.read_csv(get_TwofoldModel_metrics_filename(symbol_set_name, granularity, volumeRecordLenth, pressureRecordLength, priceRecordLength, interdayRecordLength, loadDate = date))
    data = data.drop(columns = "Unnamed: 0")
    return data

In [10]:
def compile_metrics_data(symbol_set_names, date):
    
    i = 0
    
    for name in symbol_set_names:
        for granularity in granularityRange:
            for volumeRL in volumeRLRange:
                for pressureRL in pressureRLRange:
                    for priceRL in priceRLRange:
                        for interdayRL in interdayRLRange:
                            
                            if i == 0:
                                i = 1
                                data = import_TwofoldModel_metrics_dataframe(name, granularity, volumeRL, pressureRL, priceRL, interdayRL, date)
                                data["granularity"] = granularity
                                data["Volume RL"] = volumeRL
                                data["Pressure RL"] = pressureRL
                                data["Price RL"] = priceRL
                                data["Interday RL"] = interdayRL
                                data["Stock set"] = name
                            else:
                                load_data = import_TwofoldModel_metrics_dataframe(name, granularity, volumeRL, pressureRL, priceRL, interdayRL, date)
                                load_data["granularity"] = granularity
                                load_data["Volume RL"] = volumeRL
                                load_data["Pressure RL"] = pressureRL
                                load_data["Price RL"] = priceRL
                                load_data["Interday RL"] = interdayRL
                                load_data["Stock set"] = name
                                data = data.append(load_data)
    
    return data

In [36]:
volumeRLRange = [2, 3]
pressureRLRange = [2, 3]
priceRLRange = [2, 3]
interdayRLRange = [3, 4]
granularityRange = ["quartile"]

In [37]:
full_data = compile_metrics_data(["all"], "20210105")

In [38]:
full_data["PR-Product"] = (full_data["Precision"] - full_data["Precision Variance"]**.5)*full_data["Recall"]




In [39]:
full_data.query("Precision > .8").sort_values(by = "PR-Product", ascending = False).head(10)

Unnamed: 0,Baseline Threshold,Model Threshold,Drop Threshold,Precision,Precision Variance,Accuracy,Accuracy Variance,Recall,Recall Variance,granularity,Volume RL,Pressure RL,Price RL,Interday RL,Stock set,PR-Product
0,0.5,0.65,0.5,0.81237,0.003027,0.852004,8.5e-05,0.090885,0.000223,quartile,2,3,3,4,all,0.068832
4,0.6,0.65,0.5,0.804167,0.003638,0.849882,5.6e-05,0.076404,0.000143,quartile,3,2,3,4,all,0.056833
0,0.5,0.65,0.5,0.812743,0.008323,0.84512,3.6e-05,0.078342,0.0002,quartile,3,3,3,4,all,0.056525
8,0.7,0.65,0.5,0.826809,0.003775,0.850542,5.1e-05,0.071055,6.9e-05,quartile,2,3,3,4,all,0.054383
0,0.5,0.65,0.5,0.807304,0.004678,0.849646,7.3e-05,0.073243,0.000253,quartile,3,3,2,4,all,0.05412
4,0.6,0.65,0.5,0.800299,0.005508,0.850212,4.3e-05,0.072087,8e-05,quartile,2,3,2,4,all,0.052341
8,0.7,0.65,0.5,0.806544,0.005436,0.848845,4.8e-05,0.071011,0.000145,quartile,2,2,3,3,all,0.052037
8,0.7,0.65,0.5,0.824864,0.004647,0.849505,0.000114,0.067757,0.000193,quartile,3,2,2,3,all,0.051271
8,0.7,0.65,0.5,0.805064,0.006444,0.848751,5.4e-05,0.070442,0.000111,quartile,3,3,3,4,all,0.051056
0,0.5,0.65,0.5,0.802991,0.002119,0.848751,5.8e-05,0.066557,9.3e-05,quartile,2,2,2,4,all,0.050381


The best data for nonconsecutive drops at the quartile level is:

volume: 2, pressure: 3, price: 3, interday: 4

bt: .5, mt: .65

In [15]:
volumeRLRange = [2, 3]
pressureRLRange = [2, 3]
priceRLRange = [2, 3]
interdayRLRange = [3, 4]
granularityRange = ["quartile"]

In [16]:
full_data = compile_metrics_data(["all"], "20201226")

In [17]:
full_data["PR-Product"] = (full_data["Precision"] - full_data["Precision Variance"]**.5)*full_data["Recall"]


In [18]:
full_data.query("Precision > .8").sort_values(by = "PR-Product", ascending = False).head(10)

Unnamed: 0,Baseline Threshold,Model Threshold,Drop Threshold,Precision,Precision Variance,Accuracy,Accuracy Variance,Recall,Recall Variance,granularity,Volume RL,Pressure RL,Price RL,Interday RL,Stock set,PR-Product
4,0.6,0.65,0.5,0.840021,0.001714,0.765394,0.0001,0.118477,0.000114,quartile,3,3,3,4,all,0.094619
4,0.6,0.65,0.5,0.806461,0.003336,0.765064,6.9e-05,0.113933,0.000215,quartile,2,3,3,4,all,0.085302
0,0.5,0.65,0.5,0.815695,0.002229,0.768647,0.000139,0.10768,0.000195,quartile,3,3,3,4,all,0.082751
0,0.5,0.65,0.5,0.82075,0.001445,0.763979,0.000115,0.102632,0.000101,quartile,2,3,2,4,all,0.080333
8,0.7,0.65,0.5,0.812963,0.000901,0.764356,6.6e-05,0.099563,4.9e-05,quartile,2,3,3,4,all,0.077952
4,0.6,0.65,0.5,0.81504,0.001692,0.764121,8.2e-05,0.09996,5.8e-05,quartile,2,3,2,4,all,0.07736
0,0.5,0.65,0.5,0.813249,0.001529,0.762659,0.000122,0.099926,0.000123,quartile,3,3,2,4,all,0.077357
4,0.6,0.65,0.5,0.823357,0.001103,0.756294,9.1e-05,0.096309,0.000156,quartile,3,3,2,4,all,0.076098
8,0.7,0.65,0.5,0.821618,0.000852,0.767091,5.3e-05,0.094381,0.00012,quartile,3,3,2,4,all,0.074791
8,0.7,0.65,0.5,0.833772,0.002588,0.760396,6.4e-05,0.094218,5e-05,quartile,2,3,2,4,all,0.073763


The best data for consecutive drops at the quartile level is:

volume: 3, pressure: 3, price: 3, interday: 4

bt: .6, mt: .65

In [32]:
volumeRLRange = [3, 4]
pressureRLRange = [3, 4]
priceRLRange = [3, 4]
interdayRLRange = [3, 4]
granularityRange = ["semi"]

In [33]:
full_data = compile_metrics_data(["all"], "20210105")

In [34]:
full_data["PR-Product"] = (full_data["Precision"] - full_data["Precision Variance"]**.5)*full_data["Recall"]


In [35]:
full_data.query("Precision > .5").sort_values(by = "PR-Product", ascending = False).head(10)

Unnamed: 0,Baseline Threshold,Model Threshold,Drop Threshold,Precision,Precision Variance,Accuracy,Accuracy Variance,Recall,Recall Variance,granularity,Volume RL,Pressure RL,Price RL,Interday RL,Stock set,PR-Product
0,0.5,0.65,0.5,0.809273,0.002039,0.855986,5.4e-05,0.085906,0.000162,semi,3,3,4,4,all,0.065642
4,0.6,0.65,0.5,0.838063,0.003836,0.855266,4.7e-05,0.084227,0.000216,semi,4,4,4,3,all,0.065371
0,0.5,0.65,0.5,0.797593,0.004916,0.859226,2.4e-05,0.085843,0.000207,semi,4,4,4,4,all,0.062449
8,0.7,0.65,0.5,0.804466,0.001962,0.857561,3.3e-05,0.080176,0.000105,semi,3,4,4,4,all,0.060948
0,0.5,0.65,0.5,0.809663,0.002175,0.858956,1.9e-05,0.079801,2.9e-05,semi,3,4,4,4,all,0.06089
4,0.6,0.65,0.5,0.811481,0.002611,0.857696,8.2e-05,0.076726,0.000122,semi,4,3,4,4,all,0.058341
4,0.6,0.65,0.5,0.812547,0.003677,0.853915,5.8e-05,0.077361,0.000149,semi,3,3,4,4,all,0.058169
4,0.6,0.65,0.5,0.789741,0.005933,0.859766,3.7e-05,0.081017,0.00053,semi,4,3,3,4,all,0.057742
4,0.6,0.65,0.5,0.79368,0.001484,0.855086,7.4e-05,0.076325,0.000201,semi,3,3,3,3,all,0.057638
0,0.5,0.65,0.5,0.800077,0.005064,0.857606,3e-05,0.078305,6.3e-05,semi,4,4,4,3,all,0.057077


The best data for nonconsecutive drops at the semi level is:

volume: 3, pressure: 3, price: 4, interday: 4

bt: .5, mt: .65

In [23]:
full_data = compile_metrics_data(["all"], "20201226")

In [24]:
full_data["PR-Product"] = (full_data["Precision"] - full_data["Precision Variance"]**.5)*full_data["Recall"]


In [29]:
full_data.query("Precision > .8").sort_values(by = "PR-Product", ascending = False).head(10)

Unnamed: 0,Baseline Threshold,Model Threshold,Drop Threshold,Precision,Precision Variance,Accuracy,Accuracy Variance,Recall,Recall Variance,granularity,Volume RL,Pressure RL,Price RL,Interday RL,Stock set,PR-Product
4,0.6,0.65,0.5,0.837597,0.000983,0.769487,7.9e-05,0.117502,0.000149,semi,4,4,4,4,all,0.094737
4,0.6,0.65,0.5,0.827727,0.00116,0.769037,6.8e-05,0.113511,2.9e-05,semi,4,3,4,4,all,0.090091
0,0.5,0.65,0.5,0.806804,0.001426,0.763951,8.8e-05,0.11147,0.000139,semi,4,3,4,4,all,0.085725
4,0.6,0.65,0.5,0.816611,0.001986,0.767957,2.5e-05,0.107269,5.1e-05,semi,4,3,3,4,all,0.082817
0,0.5,0.65,0.5,0.81566,0.00267,0.767327,2e-05,0.108183,8.7e-05,semi,3,4,4,4,all,0.082651
0,0.5,0.65,0.5,0.814288,0.003327,0.768992,1.8e-05,0.108749,0.000137,semi,3,3,3,4,all,0.08228
8,0.7,0.65,0.5,0.828489,0.000583,0.764896,7.2e-05,0.098739,0.000107,semi,3,3,4,4,all,0.079419
8,0.7,0.65,0.5,0.824396,0.000908,0.765527,7.5e-05,0.098911,7.7e-05,semi,4,4,4,4,all,0.078561
4,0.6,0.65,0.5,0.805202,0.001111,0.763051,0.000136,0.100139,0.000146,semi,3,4,3,4,all,0.077295
4,0.6,0.65,0.5,0.803307,0.00234,0.766247,6.4e-05,0.101778,0.000166,semi,4,4,3,4,all,0.076835
