# MLP for all horizons

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from torch import nn

from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

import tensorflow as tf
from tensorflow.keras import layers, models

In [3]:
import helper_functions as hf
import MLP_Model as mlp
import dataframe_image as dfi
from itertools import product

In [4]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score
from itertools import product
from sklearn.metrics import classification_report

## Load Data

In [5]:
df = hf.load_data()
df = hf.create_engineered_plus_fundamental_features(df)
df = hf.create_binary_labels(df,[1,20,60])

raw = ['ret','volume']
eng = [
    'mean_20', 'mean_60',
    'vol_20', 'vol_60',
    'mom_20', 'mom_60',
    'ema_12', 'ema_26', 'ema_cross',
    'skew_20', 'skew_60',
    'kurt_20', 'kurt_60',
    'vol_z'
]
fund = [
    'eps',
    'profit_margin',
    'revenue_growth',
    'income_growth',
    'gross_margin',
    'operating_margin',
    'sga_ratio',
    'rd_ratio',
    'cost_ratio',
    'net_income_per_share',
    'tax_burden',
    'nonop_ratio',
    'abnormal_ratio',
    'revenue_per_share',
    'da_ratio',
    'interest_coverage',
    'interest_burden'
]

fund_eng = fund + eng 

#rt = df.pivot(index='date', columns='ticker', values='ret')


  df['revenue_growth'] = df.groupby('ticker')['revenue'].pct_change()
  df["income_growth"] = df.groupby("ticker")["net_income"].pct_change()


# Modeling: MLP Model

## Run all the models with variations in Features and Horizons

In [6]:
features_map = {
    "raw": raw,
    "eng": eng,
    "fund": fund,
    "fund_eng": fund_eng
}

In [7]:
features_sets = ['raw', 'eng', 'fund', 'fund_eng']
targets = ['y_1', 'y_20', 'y_60']

#features_sets = ['raw']
#targets = ['y_1']

res = []

for f, t in product(features_sets, targets):
    print(f, t, type(f), type(t))
    features = features_map[f]
    data = hf.prune(df, features, t)

    train, val, test = hf.time_split(data)
    print(f, t)
    val_auc, val_acc, test_auc, test_acc = mlp.run_optimize_eval_MLP(train, val, test, features, t)

    res.append({
        "Horizon": t,
        "Features": f,
        "Val AUC": val_auc,
        "Val Accuracy": val_acc,
        "Test AUC": test_auc,
        "Test Accuracy": test_acc
    })
res

raw y_1 <class 'str'> <class 'str'>
raw y_1
Epoch 1/25
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.5008 - auc: 0.5010 - loss: 0.6960 - val_accuracy: 0.5249 - val_auc: 0.4949 - val_loss: 0.6925
Epoch 2/25
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 604us/step - accuracy: 0.5182 - auc: 0.5160 - loss: 0.6922 - val_accuracy: 0.5247 - val_auc: 0.4978 - val_loss: 0.6922
Epoch 3/25
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 587us/step - accuracy: 0.5185 - auc: 0.5087 - loss: 0.6922 - val_accuracy: 0.5268 - val_auc: 0.4955 - val_loss: 0.6923
Epoch 4/25
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 581us/step - accuracy: 0.5167 - auc: 0.5141 - loss: 0.6921 - val_accuracy: 0.5296 - val_auc: 0.4992 - val_loss: 0.6917
Epoch 5/25
[1m166/166[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 581us/step - accuracy: 0.5200 - auc: 0.5120 - loss: 0.6920 - val_accuracy: 0.5267 - val_auc: 0.49

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - accuracy: 0.5985 - auc: 0.5322 - loss: 0.6745 - val_accuracy: 0.6263 - val_auc: 0.5141 - val_loss: 0.6641
Epoch 2/25
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 590us/step - accuracy: 0.5980 - auc: 0.5337 - loss: 0.6721 - val_accuracy: 0.6263 - val_auc: 0.5124 - val_loss: 0.6647
Epoch 3/25
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 575us/step - accuracy: 0.5962 - auc: 0.5353 - loss: 0.6719 - val_accuracy: 0.6263 - val_auc: 0.5147 - val_loss: 0.6624
Epoch 4/25
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 590us/step - accuracy: 0.5954 - auc: 0.5358 - loss: 0.6719 - val_accuracy: 0.6263 - val_auc: 0.5173 - val_loss: 0.6633
Epoch 5/25
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 897us/step - accuracy: 0.5972 - auc: 0.5401 - loss: 0.6716 - val_accuracy: 0.6263 - val_auc: 0.5254 - val_loss: 0.6622
Epoch 6/25
[1m159/159[0m [32m━

  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step - accuracy: 0.5101 - auc: 0.5073 - loss: 0.7095 - val_accuracy: 0.5197 - val_auc: 0.5039 - val_loss: 0.6951
Epoch 2/25
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.5118 - auc: 0.5099 - loss: 0.6939 - val_accuracy: 0.5165 - val_auc: 0.5010 - val_loss: 0.6939
Epoch 3/25
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5144 - auc: 0.5159 - loss: 0.6924 - val_accuracy: 0.5073 - val_auc: 0.4919 - val_loss: 0.6944
Epoch 4/25
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.5168 - auc: 0.5162 - loss: 0.6924 - val_accuracy: 0.5142 - val_auc: 0.5017 - val_loss: 0.6938
Epoch 5/25
[1m159/159[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.5147 - auc: 0.5175 - loss: 0.6922 - val_accuracy: 0.5140 - val_auc: 0.4929 - val_loss: 0.6938
Epoch 6/25
[1m159/159[0m [32m━━━━━━━━━

[{'Horizon': 'y_1',
  'Features': 'raw',
  'Val AUC': 0.5092998901249828,
  'Val Accuracy': 0.5317094579767391,
  'Test AUC': 0.5095900867812929,
  'Test Accuracy': 0.5350248207391064},
 {'Horizon': 'y_20',
  'Features': 'raw',
  'Val AUC': 0.5195040186736671,
  'Val Accuracy': 0.6039924166387867,
  'Test AUC': 0.5025463902381284,
  'Test Accuracy': 0.6097779771249159},
 {'Horizon': 'y_60',
  'Features': 'raw',
  'Val AUC': 0.531987413655761,
  'Val Accuracy': 0.6263115415657788,
  'Test AUC': 0.5015413331985774,
  'Test Accuracy': 0.6927179962894249},
 {'Horizon': 'y_1',
  'Features': 'eng',
  'Val AUC': 0.5193705731676294,
  'Val Accuracy': 0.5234636227372305,
  'Test AUC': 0.5055099031787674,
  'Test Accuracy': 0.5316558441558441},
 {'Horizon': 'y_20',
  'Features': 'eng',
  'Val AUC': 0.5091201542824343,
  'Val Accuracy': 0.5738916256157636,
  'Test AUC': 0.5116656124103907,
  'Test Accuracy': 0.5696590775038339},
 {'Horizon': 'y_60',
  'Features': 'eng',
  'Val AUC': 0.53238123789

In [8]:
res_df = pd.DataFrame(res)
val_acc_df = res_df.pivot(index = "Features", columns = "Horizon", values = "Val Accuracy")

val_acc_df = val_acc_df.rename(columns={
    "y_1": "1-day horizon",
    "y_20": "20-day horizon",
    "y_60": "60-day horizon"
})

val_acc_df = val_acc_df.rename(index={
    "raw": "Raw",
    "eng": "Engineered",
    "fund": "Fundamental",
    "fund_eng": "Fund + Eng"
})

val_acc_st = (
    val_acc_df.style
      .set_caption("Table 1: MLP - Summary Validation Accuracy")
      .format("{:.4f}")              # numeric formatting
      .set_table_styles([
          {"selector": "table", "props": "width:100%; border-collapse:separate; border-spacing:10px;"},
          {"selector": "th, td", "props": "padding:10px;"},
          {"selector": "th", "props": "font-size:12pt;"},
          {"selector": "td", "props": "font-size:11pt;"},
      ])
      .set_properties(**{"min-width": "120px"})
)

dfi.export(val_acc_st, "MLP_table_validation_accuracy.png", table_conversion="matplotlib")

In [9]:
val_acc_st

Horizon,1-day horizon,20-day horizon,60-day horizon
Features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Engineered,0.5235,0.5739,0.6149
Fundamental,0.5308,0.5516,0.5886
Fund + Eng,0.5209,0.5609,0.5393
Raw,0.5317,0.604,0.6263


In [10]:
res_df = pd.DataFrame(res)
val_auc_df = res_df.pivot(index = "Features", columns = "Horizon", values = "Val AUC")

val_auc_df = val_auc_df.rename(columns={
    "y_1": "1-day horizon",
    "y_20": "20-day horizon",
    "y_60": "60-day horizon"
})

val_auc_df = val_auc_df.rename(index={
    "raw": "Raw",
    "eng": "Engineered",
    "fund": "Fundamental",
    "fund_eng": "Fund + Eng"
})

val_auc_st = (
    val_auc_df.style
      .set_caption("Table 1: MLP - Summary Validation AUC")
      .format("{:.4f}")              # numeric formatting
      .set_table_styles([
          {"selector": "table", "props": "width:100%; border-collapse:separate; border-spacing:10px;"},
          {"selector": "th, td", "props": "padding:10px;"},
          {"selector": "th", "props": "font-size:12pt;"},
          {"selector": "td", "props": "font-size:11pt;"},
      ])
      .set_properties(**{"min-width": "120px"})
)

dfi.export(val_auc_st, "MLP_table_validation_AUC.png", table_conversion="matplotlib")

In [11]:
val_auc_st

Horizon,1-day horizon,20-day horizon,60-day horizon
Features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Engineered,0.5194,0.5091,0.5324
Fundamental,0.5283,0.5862,0.6774
Fund + Eng,0.5422,0.5903,0.6053
Raw,0.5093,0.5195,0.532


In [12]:
test_acc_df = res_df.pivot(index = "Features", columns = "Horizon", values = "Test Accuracy")
test_acc_df = test_acc_df.rename(columns={
    "y_1": "1-day horizon",
    "y_20": "20-day horizon",
    "y_60": "60-day horizon"
})

test_acc_df = test_acc_df.rename(index={
    "raw": "Raw",
    "eng": "Engineered",
    "fund": "Fundamental",
    "fund_eng": "Fund + Eng"
})

test_acc_st = (
    test_acc_df.style
      .set_caption("Table 2: MLP -  Summary Testing Accuracy")
      .format("{:.4f}")              # numeric formatting
      .set_table_styles([
          {"selector": "table", "props": "width:100%; border-collapse:separate; border-spacing:10px;"},
          {"selector": "th, td", "props": "padding:10px;"},
          {"selector": "th", "props": "font-size:12pt;"},
          {"selector": "td", "props": "font-size:11pt;"},
      ])
      .set_properties(**{"min-width": "120px"})
)
dfi.export(test_acc_st, "MLP_table_test_accuracy.png", table_conversion="matplotlib")

In [13]:
test_acc_st

Horizon,1-day horizon,20-day horizon,60-day horizon
Features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Engineered,0.5317,0.5697,0.6781
Fundamental,0.5413,0.5659,0.5136
Fund + Eng,0.5011,0.5468,0.6419
Raw,0.535,0.6098,0.6927


In [14]:
test_auc_df = res_df.pivot(index = "Features", columns = "Horizon", values = "Test AUC")
test_auc_df = test_auc_df.rename(columns={
    "y_1": "1-day horizon",
    "y_20": "20-day horizon",
    "y_60": "60-day horizon"
})

test_auc_df = test_auc_df.rename(index={
    "raw": "Raw",
    "eng": "Engineered",
    "fund": "Fundamental",
    "fund_eng": "Fund + Eng"
})

test_auc_st = (
    test_auc_df.style
      .set_caption("Table 2: MLP - Summary Testing Accuracy")
      .format("{:.4f}")              # numeric formatting
      .set_table_styles([
          {"selector": "table", "props": "width:100%; border-collapse:separate; border-spacing:10px;"},
          {"selector": "th, td", "props": "padding:10px;"},
          {"selector": "th", "props": "font-size:12pt;"},
          {"selector": "td", "props": "font-size:11pt;"},
      ])
      .set_properties(**{"min-width": "120px"})
)
dfi.export(test_auc_st, "MLP_table_test_AUC.png", table_conversion="matplotlib")

In [15]:
test_auc_st

Horizon,1-day horizon,20-day horizon,60-day horizon
Features,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Engineered,0.5055,0.5117,0.511
Fundamental,0.5272,0.5996,0.7265
Fund + Eng,0.4975,0.5739,0.6828
Raw,0.5096,0.5025,0.5015
