In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from itertools import product
import pandas as pd
import dataframe_image as dfi

import helper_functions as hf
import Models

# Load Data

In [3]:
df = hf.load_data()
df = hf.create_engineered_plus_fundamental_features(df)
df = hf.create_binary_labels(df,[1,20,60])

raw = ['ret','volume']
eng = [
    'mean_20', 'mean_60',
    'vol_20', 'vol_60',
    'mom_20', 'mom_60',
    'ema_12', 'ema_26', 'ema_cross',
    'skew_20', 'skew_60',
    'kurt_20', 'kurt_60',
    'vol_z'
]
fund = [
    'eps',
    'profit_margin',
    'revenue_growth',
    'income_growth',
    'gross_margin',
    'operating_margin',
    'sga_ratio',
    'rd_ratio',
    'cost_ratio',
    'net_income_per_share',
    'tax_burden',
    'nonop_ratio',
    'abnormal_ratio',
    'revenue_per_share',
    'da_ratio',
    'interest_coverage',
    'interest_burden'
]

fund_eng = fund + eng 

#rt = df.pivot(index='date', columns='ticker', values='ret')


  df['revenue_growth'] = df.groupby('ticker')['revenue'].pct_change()
  df["income_growth"] = df.groupby("ticker")["net_income"].pct_change()


# Modeling: Logistic Regression

## Run all the models with variations in Features and Horizons

In [4]:
features_map = {
    "raw": raw,
    "eng": eng,
    "fund": fund,
    "fund_eng": fund_eng
}

In [5]:
features_sets = ['raw', 'eng', 'fund', 'fund_eng']
targets = ['y_1', 'y_20', 'y_60']

#features_sets = ['raw']
#targets = ['y_1']

res = []

for f, t in product(features_sets, targets):
    print(f, t, type(f), type(t))
    features = features_map[f]
    data = hf.prune(df, features, t)

    train, val, test = hf.time_split(data)
    print(f, t)
    val_auc, val_acc, test_auc, test_acc = Models.run_logistic_regression(train, val, test, features, t)

    res.append({
        "Horizon": t,
        "Features": f,
        "Val AUC": val_auc,
        "Val Accuracy": val_acc,
        "Test AUC": test_auc,
        "Test Accuracy": test_acc
    })

res

raw y_1 <class 'str'> <class 'str'>
raw y_1




raw y_20 <class 'str'> <class 'str'>
raw y_20




raw y_60 <class 'str'> <class 'str'>
raw y_60




eng y_1 <class 'str'> <class 'str'>
eng y_1




eng y_20 <class 'str'> <class 'str'>
eng y_20




eng y_60 <class 'str'> <class 'str'>
eng y_60




fund y_1 <class 'str'> <class 'str'>
fund y_1




fund y_20 <class 'str'> <class 'str'>
fund y_20




fund y_60 <class 'str'> <class 'str'>
fund y_60




fund_eng y_1 <class 'str'> <class 'str'>
fund_eng y_1




fund_eng y_20 <class 'str'> <class 'str'>
fund_eng y_20




fund_eng y_60 <class 'str'> <class 'str'>
fund_eng y_60




[{'Horizon': 'y_1',
  'Features': 'raw',
  'Val AUC': 0.4923070078208573,
  'Val Accuracy': 0.5260039499670837,
  'Test AUC': 0.4985415036119433,
  'Test Accuracy': 0.5343629343629344},
 {'Horizon': 'y_20',
  'Features': 'raw',
  'Val AUC': 0.4979015938672902,
  'Val Accuracy': 0.4459685513549682,
  'Test AUC': 0.5279288362438861,
  'Test Accuracy': 0.4392240412648576},
 {'Horizon': 'y_60',
  'Features': 'raw',
  'Val AUC': 0.5024241822488832,
  'Val Accuracy': 0.6263115415657788,
  'Test AUC': 0.5045861626797885,
  'Test Accuracy': 0.6927179962894249},
 {'Horizon': 'y_1',
  'Features': 'eng',
  'Val AUC': 0.4993089328350766,
  'Val Accuracy': 0.4939467312348668,
  'Test AUC': 0.5067686224404312,
  'Test Accuracy': 0.5001159554730983},
 {'Horizon': 'y_20',
  'Features': 'eng',
  'Val AUC': 0.4715113895816627,
  'Val Accuracy': 0.5498475252169833,
  'Test AUC': 0.5230466047225638,
  'Test Accuracy': 0.6048130234752861},
 {'Horizon': 'y_60',
  'Features': 'eng',
  'Val AUC': 0.4404432627

In [6]:
res_df = pd.DataFrame(res)
val_acc_df = res_df.pivot(index = "Features", columns = "Horizon", values = "Val Accuracy")

val_acc_df = val_acc_df.rename(columns={
    "y_1": "1-day horizon",
    "y_20": "20-day horizon",
    "y_60": "60-day horizon"
})

val_acc_df = val_acc_df.rename(index={
    "raw": "Raw",
    "eng": "Engineered",
    "fund": "Fundamental",
    "fund_eng": "Fund + Eng"
})

val_acc_st = (
    val_acc_df.style
      .set_caption("Table 1: Logistic - Summary Validation Accuracy")
      .format("{:.4f}")              # numeric formatting
      .set_table_styles([
          {"selector": "table", "props": "width:100%; border-collapse:separate; border-spacing:10px;"},
          {"selector": "th, td", "props": "padding:10px;"},
          {"selector": "th", "props": "font-size:12pt;"},
          {"selector": "td", "props": "font-size:11pt;"},
      ])
      .set_properties(**{"min-width": "120px"})
)

dfi.export(val_acc_st, "Logistic_table_validation_accuracy.png", table_conversion="matplotlib")

In [7]:
val_acc_st

Horizon,1-day horizon,20-day horizon,60-day horizon
Features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Engineered,0.4939,0.5498,0.5922
Fundamental,0.5178,0.5407,0.4648
Fund + Eng,0.5051,0.4391,0.4929
Raw,0.526,0.446,0.6263


In [8]:
res_df = pd.DataFrame(res)
val_auc_df = res_df.pivot(index = "Features", columns = "Horizon", values = "Val AUC")

val_auc_df = val_auc_df.rename(columns={
    "y_1": "1-day horizon",
    "y_20": "20-day horizon",
    "y_60": "60-day horizon"
})

val_auc_df = val_auc_df.rename(index={
    "raw": "Raw",
    "eng": "Engineered",
    "fund": "Fundamental",
    "fund_eng": "Fund + Eng"
})

val_auc_st = (
    val_auc_df.style
      .set_caption("Table 1: Logistic - Summary Validation AUC")
      .format("{:.4f}")              # numeric formatting
      .set_table_styles([
          {"selector": "table", "props": "width:100%; border-collapse:separate; border-spacing:10px;"},
          {"selector": "th, td", "props": "padding:10px;"},
          {"selector": "th", "props": "font-size:12pt;"},
          {"selector": "td", "props": "font-size:11pt;"},
      ])
      .set_properties(**{"min-width": "120px"})
)

#dfi.export(test_st, "random_forest_table_test.png", table_conversion="matplotlib")
dfi.export(val_auc_st, "Logistic_table_validation_AUC.png", table_conversion="matplotlib")

In [9]:
val_auc_st

Horizon,1-day horizon,20-day horizon,60-day horizon
Features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Engineered,0.4993,0.4715,0.4404
Fundamental,0.5284,0.4863,0.4627
Fund + Eng,0.5087,0.4493,0.4723
Raw,0.4923,0.4979,0.5024


In [10]:
test_acc_df = res_df.pivot(index = "Features", columns = "Horizon", values = "Test Accuracy")
test_acc_df = test_acc_df.rename(columns={
    "y_1": "1-day horizon",
    "y_20": "20-day horizon",
    "y_60": "60-day horizon"
})

test_acc_df = test_acc_df.rename(index={
    "raw": "Raw",
    "eng": "Engineered",
    "fund": "Fundamental",
    "fund_eng": "Fund + Eng"
})

test_acc_st = (
    test_acc_df.style
      .set_caption("Table 2: Logistic - Summary Testing Accuracy")
      .format("{:.4f}")              # numeric formatting
      .set_table_styles([
          {"selector": "table", "props": "width:100%; border-collapse:separate; border-spacing:10px;"},
          {"selector": "th, td", "props": "padding:10px;"},
          {"selector": "th", "props": "font-size:12pt;"},
          {"selector": "td", "props": "font-size:11pt;"},
      ])
      .set_properties(**{"min-width": "120px"})
)
dfi.export(test_acc_st, "Logistic_table_test_accuracy.png", table_conversion="matplotlib")

In [11]:
test_acc_st

Horizon,1-day horizon,20-day horizon,60-day horizon
Features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Engineered,0.5001,0.6048,0.6892
Fundamental,0.5424,0.5604,0.542
Fund + Eng,0.492,0.5676,0.5413
Raw,0.5344,0.4392,0.6927


In [12]:
test_auc_df = res_df.pivot(index = "Features", columns = "Horizon", values = "Test AUC")
test_auc_df = test_auc_df.rename(columns={
    "y_1": "1-day horizon",
    "y_20": "20-day horizon",
    "y_60": "60-day horizon"
})

test_auc_df = test_auc_df.rename(index={
    "raw": "Raw",
    "eng": "Engineered",
    "fund": "Fundamental",
    "fund_eng": "Fund + Eng"
})

test_auc_st = (
    test_auc_df.style
      .set_caption("Table 2: Logistic - Summary Testing Accuracy")
      .format("{:.4f}")              # numeric formatting
      .set_table_styles([
          {"selector": "table", "props": "width:100%; border-collapse:separate; border-spacing:10px;"},
          {"selector": "th, td", "props": "padding:10px;"},
          {"selector": "th", "props": "font-size:12pt;"},
          {"selector": "td", "props": "font-size:11pt;"},
      ])
      .set_properties(**{"min-width": "120px"})
)
dfi.export(test_auc_st, "Logistic_table_test_AUC.png", table_conversion="matplotlib")

In [13]:
test_auc_st

Horizon,1-day horizon,20-day horizon,60-day horizon
Features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Engineered,0.5068,0.523,0.5138
Fundamental,0.5254,0.572,0.6283
Fund + Eng,0.5231,0.6192,0.6371
Raw,0.4985,0.5279,0.5046
