# Random Forest Model

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
import helper_functions as hf
import Models as models
import MLP_Model as mlp
import Random_Forest_Model as rf

In [5]:
df = hf.load_data()
df = hf.create_raw_features(df)
df = hf.create_fundamental_features(df)
df = hf.create_engineered_features(df)
df = hf.create_binary_labels(df,[1,20,60])

raw = ['ret','volume']
eng = [
    'mean_20', 'mean_60',
    'vol_20', 'vol_60',
    'mom_20', 'mom_60',
    'ema_12', 'ema_26', 'ema_cross',
    'skew_20', 'skew_60',
    'kurt_20', 'kurt_60',
    'vol_z'
]
fund = [
    'eps',
    'profit_margin',
    'revenue_growth',
    'income_growth',
    'gross_margin',
    'operating_margin',
    'sga_ratio',
    'rd_ratio',
    'cost_ratio',
    'net_income_per_share',
    'tax_burden',
    'nonop_ratio',
    'abnormal_ratio',
    'revenue_per_share',
    'da_ratio',
    'interest_coverage',
    'interest_burden'
]
fund_eng = fund + eng

#rt = df.pivot(index='date', columns='ticker', values='ret')


  df['revenue_growth'] = df.groupby('ticker')['revenue'].pct_change()
  df["income_growth"] = df.groupby("ticker")["net_income"].pct_change()


# 1 Day Horizon

In [6]:
TARGET = "y_1"
target = 1

## Raw Features

In [7]:
FEATURES = raw

In [8]:
pruned_df = hf.prune(df,FEATURES,TARGET)
train,val,test = hf.time_split(df,0.7,0.15,'date')

In [9]:
rf.run_optimize_eval_RF(train, val, test, FEATURES, TARGET)

Test AUC: 0.4972810690259312
Test accuracy: 0.5014418811002662
              precision    recall  f1-score   support

           0       0.46      0.42      0.44      4198
           1       0.53      0.58      0.55      4818

    accuracy                           0.50      9016
   macro avg       0.50      0.50      0.49      9016
weighted avg       0.50      0.50      0.50      9016



## Fundamental Features

In [10]:
FEATURES = fund
pruned_df = hf.prune(df,FEATURES,TARGET)
train,val,test = hf.time_split(df,0.7,0.15,'date')
rf.run_optimize_eval_RF(train, val, test, FEATURES, TARGET)

Test AUC: 0.5326737065018315
Test accuracy: 0.5108695652173914
              precision    recall  f1-score   support

           0       0.47      0.59      0.52       416
           1       0.57      0.44      0.50       504

    accuracy                           0.51       920
   macro avg       0.52      0.52      0.51       920
weighted avg       0.52      0.51      0.51       920



## Engineered Features

In [11]:
FEATURES = eng
pruned_df = hf.prune(df,FEATURES,TARGET)
train,val,test = hf.time_split(df,0.7,0.15,'date')
rf.run_optimize_eval_RF(train, val, test, FEATURES, TARGET)

Test AUC: 0.5068948505989628
Test accuracy: 0.5056566104702751
              precision    recall  f1-score   support

           0       0.47      0.50      0.49      4198
           1       0.54      0.51      0.52      4818

    accuracy                           0.51      9016
   macro avg       0.51      0.51      0.50      9016
weighted avg       0.51      0.51      0.51      9016



## Fundamental & Engneered Features

In [12]:
FEATURES = eng + fund
pruned_df = hf.prune(df,FEATURES,TARGET)
train,val,test = hf.time_split(df,0.7,0.15,'date')
rf.run_optimize_eval_RF(train, val, test, FEATURES, TARGET)

Test AUC: 0.4988600808913308
Test accuracy: 0.475
              precision    recall  f1-score   support

           0       0.44      0.62      0.52       416
           1       0.53      0.36      0.43       504

    accuracy                           0.47       920
   macro avg       0.49      0.49      0.47       920
weighted avg       0.49      0.47      0.47       920



# 20 Day Horizon

In [13]:
TARGET = "y_20"
target = 20

## Raw Features

In [14]:
FEATURES = raw
pruned_df = hf.prune(df,FEATURES,TARGET)
train,val,test = hf.time_split(df,0.7,0.15,'date')
rf.run_optimize_eval_RF(train, val, test, FEATURES, TARGET)

Test AUC: 0.5098113748423346
Test accuracy: 0.46691403834260975
              precision    recall  f1-score   support

           0       0.41      0.75      0.53      3240
           1       0.62      0.28      0.39      4845

    accuracy                           0.47      8085
   macro avg       0.52      0.51      0.46      8085
weighted avg       0.54      0.47      0.44      8085



## Fundamental Features

In [15]:
FEATURES = fund
pruned_df = hf.prune(df,FEATURES,TARGET)
train,val,test = hf.time_split(df,0.7,0.15,'date')
rf.run_optimize_eval_RF(train, val, test, FEATURES, TARGET)

Test AUC: 0.5779907091755894
Test accuracy: 0.6096969696969697
              precision    recall  f1-score   support

           0       0.58      0.33      0.42       353
           1       0.62      0.82      0.71       472

    accuracy                           0.61       825
   macro avg       0.60      0.57      0.56       825
weighted avg       0.60      0.61      0.58       825



## Engineered Features

In [16]:
FEATURES = eng
pruned_df = hf.prune(df,FEATURES,TARGET)
train,val,test = hf.time_split(df,0.7,0.15,'date')
rf.run_optimize_eval_RF(train, val, test, FEATURES, TARGET)

Test AUC: 0.4952306055625629
Test accuracy: 0.5100803957946816
              precision    recall  f1-score   support

           0       0.40      0.43      0.41      3240
           1       0.60      0.56      0.58      4845

    accuracy                           0.51      8085
   macro avg       0.50      0.50      0.50      8085
weighted avg       0.52      0.51      0.51      8085



## Fundamental & Engineered Features

In [17]:
FEATURES = eng + fund
pruned_df = hf.prune(df,FEATURES,TARGET)
train,val,test = hf.time_split(df,0.7,0.15,'date')
rf.run_optimize_eval_RF(train, val, test, FEATURES, TARGET)

Test AUC: 0.5519818024679503
Test accuracy: 0.5212121212121212
              precision    recall  f1-score   support

           0       0.46      0.64      0.53       353
           1       0.62      0.43      0.51       472

    accuracy                           0.52       825
   macro avg       0.54      0.54      0.52       825
weighted avg       0.55      0.52      0.52       825



# 60 Days Horizon

In [18]:
TARGET = "y_60"
target = 60

## Raw Features

In [19]:
FEATURES = raw
pruned_df = hf.prune(df,FEATURES,TARGET)
train,val,test = hf.time_split(df,0.7,0.15,'date')
rf.run_optimize_eval_RF(train, val, test, FEATURES, TARGET)

Test AUC: 0.5128737554645844
Test accuracy: 0.5681632653061225
              precision    recall  f1-score   support

           0       0.29      0.37      0.33      1728
           1       0.72      0.65      0.68      4397

    accuracy                           0.57      6125
   macro avg       0.51      0.51      0.50      6125
weighted avg       0.60      0.57      0.58      6125



## Fundamental Features

In [20]:
FEATURES = fund
pruned_df = hf.prune(df,FEATURES,TARGET)
train,val,test = hf.time_split(df,0.7,0.15,'date')
rf.run_optimize_eval_RF(train, val, test, FEATURES, TARGET)

Test AUC: 0.526414167307528
Test accuracy: 0.4704
              precision    recall  f1-score   support

           0       0.35      0.37      0.36       249
           1       0.56      0.54      0.55       376

    accuracy                           0.47       625
   macro avg       0.45      0.45      0.45       625
weighted avg       0.48      0.47      0.47       625



## Engineered Features

In [21]:
FEATURES = eng
pruned_df = hf.prune(df,FEATURES,TARGET)
train,val,test = hf.time_split(df,0.7,0.15,'date')
rf.run_optimize_eval_RF(train, val, test, FEATURES, TARGET)

Test AUC: 0.4965184990397493
Test accuracy: 0.5831836734693877
              precision    recall  f1-score   support

           0       0.25      0.23      0.24      1728
           1       0.71      0.72      0.71      4397

    accuracy                           0.58      6125
   macro avg       0.48      0.48      0.48      6125
weighted avg       0.58      0.58      0.58      6125



## Fundamental & Engineered Features

In [22]:
FEATURES = eng + fund
pruned_df = hf.prune(df,FEATURES,TARGET)
train,val,test = hf.time_split(df,0.7,0.15,'date')
rf.run_optimize_eval_RF(train, val, test, FEATURES, TARGET)

Test AUC: 0.7742886439374519
Test accuracy: 0.6176
              precision    recall  f1-score   support

           0       0.51      0.74      0.61       249
           1       0.76      0.54      0.63       376

    accuracy                           0.62       625
   macro avg       0.64      0.64      0.62       625
weighted avg       0.66      0.62      0.62       625

