In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
from itertools import product
import pandas as pd

import helper_functions as hf
import Models

# Load Data

In [3]:
df = hf.load_data()
df = hf.create_engineered_plus_fundamental_features(df)
df = hf.create_binary_labels(df,[1,20,60])

raw = ['ret','volume']
eng = [
    'mean_20', 'mean_60',
    'vol_20', 'vol_60',
    'mom_20', 'mom_60',
    'ema_12', 'ema_26', 'ema_cross',
    'skew_20', 'skew_60',
    'kurt_20', 'kurt_60',
    'vol_z'
]
fund = [
    'eps',
    'profit_margin',
    'revenue_growth',
    'income_growth',
    'gross_margin',
    'operating_margin',
    'sga_ratio',
    'rd_ratio',
    'cost_ratio',
    'net_income_per_share',
    'tax_burden',
    'nonop_ratio',
    'abnormal_ratio',
    'revenue_per_share',
    'da_ratio',
    'interest_coverage',
    'interest_burden'
]

fund_eng = fund + eng 

#rt = df.pivot(index='date', columns='ticker', values='ret')


  df['revenue_growth'] = df.groupby('ticker')['revenue'].pct_change()
  df["income_growth"] = df.groupby("ticker")["net_income"].pct_change()


# Modeling: Logistic Regression

## Run all the models with variations in Features and Horizons

In [4]:
features_map = {
    "raw": raw,
    "eng": eng,
    "fund": fund,
    "fund_eng": fund_eng
}

In [5]:
features_sets = ['raw', 'eng', 'fund', 'fund_eng']
targets = ['y_1', 'y_20', 'y_60']

#features_sets = ['raw']
#targets = ['y_1']

res = []

for f, t in product(features_sets, targets):
    print(f, t, type(f), type(t))
    features = features_map[f]
    data = hf.prune(df, features, t)

    train, val, test = hf.time_split(data)
    print(f, t)
    val_acc, test_acc = Models.run_logistic_regression(train, val, test, features, t)

    res.append({
        "Horizon": t,
        "Features": f,
        "Val Accuracy": val_acc,
        "Test Accuracy": test_acc,
    })

res

raw y_1 <class 'str'> <class 'str'>
raw y_1




raw y_20 <class 'str'> <class 'str'>
raw y_20




raw y_60 <class 'str'> <class 'str'>
raw y_60




eng y_1 <class 'str'> <class 'str'>
eng y_1




eng y_20 <class 'str'> <class 'str'>
eng y_20




eng y_60 <class 'str'> <class 'str'>
eng y_60




fund y_1 <class 'str'> <class 'str'>
fund y_1




fund y_20 <class 'str'> <class 'str'>
fund y_20




fund y_60 <class 'str'> <class 'str'>
fund y_60




fund_eng y_1 <class 'str'> <class 'str'>


KeyError: ['eps_mom20']

In [None]:
res_df = pd.DataFrame(res)
val_df = res_df.pivot(index = "Features", columns = "Horizon", values = "Val Accuracy")
val_df

In [None]:
test_df = res_df.pivot(index = "Features", columns = "Horizon", values = "Test Accuracy")
test_df

In [None]:
val_df = val_df.rename(columns={
    "y_1": "1-day horizon",
    "y_20": "20-day horizon",
    "y_60": "60-day horizon"
})

val_df = val_df.rename(index={
    "raw": "Raw",
    "eng": "Engineered",
    "fund": "Fundamental",
    "fund_eng": "Fund + Eng"
})

val_st = (
    val_df.style
      .set_caption("Table 1: Summary Validation AUC")
      .format("{:.4f}")              # numeric formatting
      .set_table_styles([
          {"selector": "table", "props": "width:100%; border-collapse:separate; border-spacing:10px;"},
          {"selector": "th, td", "props": "padding:10px;"},
          {"selector": "th", "props": "font-size:12pt;"},
          {"selector": "td", "props": "font-size:11pt;"},
      ])
      .set_properties(**{"min-width": "120px"})
)
val_st

In [None]:
test_df = test_df.rename(columns={
    "y_1": "1-day horizon",
    "y_20": "20-day horizon",
    "y_60": "60-day horizon"
})

test_df = test_df.rename(index={
    "raw": "Raw",
    "eng": "Engineered",
    "fund": "Fundamental",
    "fund_eng": "Fund + Eng"
})

test_st = (
    test_df.style
      .set_caption("Table 2: Summary Testing AUC")
      .format("{:.4f}")              # numeric formatting
      .set_table_styles([
          {"selector": "table", "props": "width:100%; border-collapse:separate; border-spacing:10px;"},
          {"selector": "th, td", "props": "padding:10px;"},
          {"selector": "th", "props": "font-size:12pt;"},
          {"selector": "td", "props": "font-size:11pt;"},
      ])
      .set_properties(**{"min-width": "120px"})
)
test_st

In [18]:
import dataframe_image as dfi
# dfi.export(styled, "table1.png")
dfi.export(test_st, "logistic_regression_table_test.png", table_conversion="matplotlib")
dfi.export(val_st, "logistic_regression_table_validation.png", table_conversion="matplotlib")