In [1]:
import pandas as pd
import numpy as np
from MLP.utils import datasets,plot_table,train_model
from sklearn import metrics
from imblearn.under_sampling import RandomUnderSampler
from tabulate import tabulate
import matplotlib.pyplot as plt
from fraudDetec.data_processing import DataProcessor
from fraudDetec.utils import evaluate,null_check,results
from fraudDetec.models import rus_boost, svm_model, xgb_model, logistic_regression_model,probit_regression_model,MLP,mlp_grid_search, random_forests
import json
from collections import defaultdict

In [2]:
data = pd.read_csv('./data/merged_compustat_and_labels.csv')
with open('MLP/features.json') as json_file:
    data_items = json.load(json_file)

data.replace([np.inf, -np.inf], np.nan, inplace=True)

data = data.fillna(0)

data_obj = DataProcessor(data,(1990,2002), (2002,2002), (2003,2019), 5)

In [3]:
models = {"MLP": MLP,
         "RUS BOOST": rus_boost,
          "Logit":logistic_regression_model,
         "Probit":probit_regression_model,
         "Xg Boost":xgb_model}

---

-----

### Batch Processing

In [9]:
## TEST

# from imblearn.under_sampling import RandomUnderSampler
# from imblearn.over_sampling import BorderlineSMOTE

# train_data, validation_data, test_data = data_obj.split_data_periods(train_period, test_period)
# train_data, validation_data, test_data = null_check(data_items['features'], train_data, validation_data, test_data)

# X_train, y_train = train_data[data_items['features']], train_data['misstate']
# X_test, y_test = test_data[data_items['features']], test_data['misstate']

# print(X_train.shape,y_train.value_counts())

# X_train_resampled, y_train_resampled = BorderlineSMOTE(sampling_strategy=0.1, random_state=42).fit_resample(X_train, y_train)
# X_train_resampled, y_train_resampled = RandomUnderSampler(random_state=42).fit_resample(X_train_resampled, y_train_resampled)

# print("Train Shape: ",X_train_resampled.shape, y_train_resampled.shape)
# print("Test Shape: ",X_test.shape, y_test.shape)

(120819, 42) 0    119988
1       831
Name: misstate, dtype: int64
Train Shape:  (23996, 42) (23996,)
Test Shape:  (125314, 42) (125314,)


In [6]:
train_period = (1990,2002)

test_periods = [(2003,2019)]

In [6]:
train_period = (1990,2002)
res = defaultdict(lambda: defaultdict(dict))
for test_period in test_periods:
    for model in models.keys():
        for data_item in data_items.keys():
            auc = results(data_obj,train_period,test_period,data_items[data_item],models[model],param_grid=None,sample='over_under')
            res[test_period][data_item][model] = auc

Train Shape:  (23996, 42) (23996,)
Test Shape:  (125314, 42) (125314,)
Train Shape:  (23996, 28) (23996,)
Test Shape:  (125314, 28) (125314,)
Train Shape:  (23996, 14) (23996,)
Test Shape:  (125314, 14) (125314,)
Train Shape:  (23996, 42) (23996,)
Test Shape:  (125314, 42) (125314,)
Train Shape:  (23996, 28) (23996,)
Test Shape:  (125314, 28) (125314,)
Train Shape:  (23996, 14) (23996,)
Test Shape:  (125314, 14) (125314,)
Train Shape:  (23996, 42) (23996,)
Test Shape:  (125314, 42) (125314,)
Train Shape:  (23996, 28) (23996,)
Test Shape:  (125314, 28) (125314,)
Train Shape:  (23996, 14) (23996,)
Test Shape:  (125314, 14) (125314,)
Train Shape:  (23996, 42) (23996,)
Test Shape:  (125314, 42) (125314,)
         Current function value: 0.537376
         Iterations: 35




Train Shape:  (23996, 28) (23996,)
Test Shape:  (125314, 28) (125314,)
         Current function value: 0.586711
         Iterations: 35




Train Shape:  (23996, 14) (23996,)
Test Shape:  (125314, 14) (125314,)
         Current function value: 0.580127
         Iterations: 35


  L = q*self.pdf(q*XB)/self.cdf(q*XB)
  return np.dot(-L*(L+XB)*X.T,X)


Train Shape:  (23996, 42) (23996,)
Test Shape:  (125314, 42) (125314,)
Test AUC Score: 0.7002878463521306
Train Shape:  (23996, 28) (23996,)
Test Shape:  (125314, 28) (125314,)
Test AUC Score: 0.7060311413405809
Train Shape:  (23996, 14) (23996,)
Test Shape:  (125314, 14) (125314,)
Test AUC Score: 0.6773307399036717


In [7]:
res

defaultdict(<function __main__.<lambda>()>,
            {(2003,
              2019): defaultdict(dict,
                         {'features': {'MLP': 0.6208160057086918,
                           'RUS BOOST': 0.7039460381406621,
                           'Logit': 0.6256666720096601,
                           'Probit': 0.6394080200706581,
                           'Xg Boost': 0.7002878463521306},
                          'raw_financial_items_28': {'MLP': 0.611555959842061,
                           'RUS BOOST': 0.7017081179068051,
                           'Logit': 0.6277118140709513,
                           'Probit': 0.6220716833813312,
                           'Xg Boost': 0.7060311413405809},
                          'financial_ratios_14': {'MLP': 0.5273529652723234,
                           'RUS BOOST': 0.6558439881528025,
                           'Logit': 0.637033178208423,
                           'Probit': 0.6338894647448513,
                           'Xg Boost'

## Batch - Tuning

In [7]:
res = defaultdict(lambda: defaultdict(dict))
for test_period in test_periods:
    for data_item in data_items.keys():
        param_grid = {
            'activation': ['logistic', 'tanh', 'relu'],
            'hidden_layer_sizes': [
                (len(data_items[data_item]), 40),
                (len(data_items[data_item]), 40, 50),
                (len(data_items[data_item]), 30, 40, 30),
                (len(data_items[data_item]), 40, 50, 60, 40),
                (len(data_items[data_item]), 30, 30, 30, 30)
                ],
            'learning_rate_init': [0.001, 0.01, 0.1]
            }
        auc = results(data_obj,train_period,test_period,data_items[data_item],mlp_grid_search,param_grid,'over_under')
        res[test_period][data_item] = auc

Train Shape:  (23996, 42) (23996,)
Test Shape:  (125314, 42) (125314,)
Best Hyperparameters: {'activation': 'relu', 'hidden_layer_sizes': (42, 40, 50, 60, 40), 'learning_rate_init': 0.001}
Best AUC Score: 0.940478565135647
Test AUC Score: 0.6501184289343281
Train Shape:  (23996, 28) (23996,)
Test Shape:  (125314, 28) (125314,)
Best Hyperparameters: {'activation': 'relu', 'hidden_layer_sizes': (28, 30, 30, 30, 30), 'learning_rate_init': 0.001}
Best AUC Score: 0.9360213749507895
Test AUC Score: 0.6432269246501379
Train Shape:  (23996, 14) (23996,)
Test Shape:  (125314, 14) (125314,)
Best Hyperparameters: {'activation': 'tanh', 'hidden_layer_sizes': (14, 30, 40, 30), 'learning_rate_init': 0.001}
Best AUC Score: 0.9662352677837408
Test AUC Score: 0.5890477687362455


In [8]:
res

defaultdict(<function __main__.<lambda>()>,
            {(2003,
              2019): defaultdict(dict,
                         {'features': 0.6501184289343281,
                          'raw_financial_items_28': 0.6432269246501379,
                          'financial_ratios_14': 0.5890477687362455})})

In [9]:
res = defaultdict(lambda: defaultdict(dict))
for test_period in test_periods:
    for data_item in data_items.keys():
        param_grid = {
            'activation': ['logistic', 'tanh', 'relu'],
            'hidden_layer_sizes': [
                (len(data_items[data_item]), 20),
                (len(data_items[data_item]), 40, 50, 10),
                (len(data_items[data_item]), 40, 30, 50, 20)
                ],
            'learning_rate_init': [0.001, 0.01,0.09]
            }
        auc = results(data_obj,train_period,test_period,data_items[data_item],mlp_grid_search,param_grid,'over_under')
        res[test_period][data_item] = auc

Train Shape:  (23996, 42) (23996,)
Test Shape:  (125314, 42) (125314,)
Best Hyperparameters: {'activation': 'relu', 'hidden_layer_sizes': (42, 40, 30, 50, 20), 'learning_rate_init': 0.001}
Best AUC Score: 0.9354832251574731
Test AUC Score: 0.6525207204254923
Train Shape:  (23996, 28) (23996,)
Test Shape:  (125314, 28) (125314,)
Best Hyperparameters: {'activation': 'relu', 'hidden_layer_sizes': (28, 40, 30, 50, 20), 'learning_rate_init': 0.001}
Best AUC Score: 0.9175614069664443
Test AUC Score: 0.6402278059472267
Train Shape:  (23996, 14) (23996,)
Test Shape:  (125314, 14) (125314,)
Best Hyperparameters: {'activation': 'tanh', 'hidden_layer_sizes': (14, 40, 30, 50, 20), 'learning_rate_init': 0.001}
Best AUC Score: 0.9639525122302116
Test AUC Score: 0.5770155403933275


In [10]:
res

defaultdict(<function __main__.<lambda>()>,
            {(2003,
              2019): defaultdict(dict,
                         {'features': 0.6525207204254923,
                          'raw_financial_items_28': 0.6402278059472267,
                          'financial_ratios_14': 0.5770155403933275})})

------------
_____________

## XgBoost Tuning

In [12]:
res = defaultdict(lambda: defaultdict(dict))
print("XgBoost Results:))")
for test_period in test_periods:
    for data_item in data_items.keys():
        param_grid = {
                    'max_depth': [3, 4, 5],
                    'learning_rate': [0.001, 0.01,0.09]
                    }
        auc = results(data_obj,train_period,test_period,data_items[data_item],xgb_model,param_grid,'over_under')
        res[test_period][data_item] = auc


XgBoost Results:))
Train Shape:  (23996, 42) (23996,)
Test Shape:  (125314, 42) (125314,)
Best Hyperparameters: {'learning_rate': 0.09, 'max_depth': 5}
Best AUC Score: 0.9912572152821199
Test AUC Score: 0.7083533027714106
Train Shape:  (23996, 28) (23996,)
Test Shape:  (125314, 28) (125314,)
Best Hyperparameters: {'learning_rate': 0.09, 'max_depth': 5}
Best AUC Score: 0.991427594541707
Test AUC Score: 0.7152315089385313
Train Shape:  (23996, 14) (23996,)
Test Shape:  (125314, 14) (125314,)
Best Hyperparameters: {'learning_rate': 0.09, 'max_depth': 5}
Best AUC Score: 0.9820219265839933
Test AUC Score: 0.6577597333015128


In [13]:
res

defaultdict(<function __main__.<lambda>()>,
            {(2003,
              2019): defaultdict(dict,
                         {'features': 0.7083533027714106,
                          'raw_financial_items_28': 0.7152315089385313,
                          'financial_ratios_14': 0.6577597333015128})})

-----------
----

In [None]:
# from sklearn.ensemble import RandomForestClassifier
# from sklearn.model_selection import GridSearchCV
# def random_forests(data_obj,train_period,test_period,item):
#     # Define the Random Forest classifier
#     rf_model = RandomForestClassifier(random_state=42)

#     # Define the parameter grid to search
#     param_grid = {
#         'n_estimators': [100, 200, 300],  # Number of trees in the forest
#         'max_depth': [None, 10, 20, 30],  # Maximum depth of the tree
#         'min_samples_split': [2, 5, 10],  # Minimum number of samples required to split an internal node
#         'min_samples_leaf': [1, 2, 4]  # Minimum number of samples required to be at a leaf node
#     }

#     # Perform grid search using cross-validation
#     grid_search = GridSearchCV(estimator=rf_model, param_grid=param_grid, scoring='roc_auc', cv=5)
#     grid_search.fit(X_train, y_train)

#     # Get the best hyperparameters and the corresponding AUC score
#     best_params = grid_search.best_params_
#     best_auc = grid_search.best_score_
#     test_auc = grid_search.score(X_test, y_test)

#     # Print the best hyperparameters, the corresponding AUC score, and test AUC Score
#     print("Best Hyperparameters:", best_params)
#     print("Best AUC Score:", best_auc)
#     print("Test AUC Score:", test_auc)

#     # Return the test AUC score
#     return test_auc

In [None]:
# random_forests

## Random Forest

In [14]:
res = defaultdict(lambda: defaultdict(dict))
print("Random Forest Results:))")
for test_period in test_periods:
    for data_item in data_items.keys():
        param_grid = {
                        'n_estimators': [100, 200, 300],  # Number of trees in the forest
                        'max_depth': [None, 10, 20, 30],  # Maximum depth of the tree
                        'min_samples_split': [2, 5, 10],  # Minimum number of samples required to split an internal node
                        'min_samples_leaf': [1, 2, 4]  # Minimum number of samples required to be at a leaf node
                    }
        auc = results(data_obj,train_period,test_period,data_items[data_item],random_forests,param_grid,'over_under')
        res[test_period][data_item] = auc

Random Forest Results:))
Train Shape:  (23996, 42) (23996,)
Test Shape:  (125314, 42) (125314,)
Best Hyperparameters: {'max_depth': 30, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}
Best AUC Score: 0.993149231385473
Test AUC Score: 0.7136662790028074
Train Shape:  (23996, 28) (23996,)
Test Shape:  (125314, 28) (125314,)
Best Hyperparameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}
Best AUC Score: 0.9933354636118636
Test AUC Score: 0.7049460609968008
Train Shape:  (23996, 14) (23996,)
Test Shape:  (125314, 14) (125314,)
Best Hyperparameters: {'max_depth': None, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 300}
Best AUC Score: 0.9858569702870987
Test AUC Score: 0.6853893097980408


In [15]:
res

defaultdict(<function __main__.<lambda>()>,
            {(2003,
              2019): defaultdict(dict,
                         {'features': 0.7136662790028074,
                          'raw_financial_items_28': 0.7049460609968008,
                          'financial_ratios_14': 0.6853893097980408})})

### Oversample-Batch

|                  | MLP      | RUS BOOST | Logit    | Probit   | Xg Boost | MLP2     | MLP3     | XgBoost  | RandomForest |
|------------------|----------|-----------|----------|----------|----------|----------|----------|----------|--------------|
| features         | 0.6208   | 0.7039    | 0.6257   | 0.6394   | 0.7003   | 0.6501   | 0.6525   | 0.7084   | 0.7137       |
| raw_financial_items_28 | 0.6116   | 0.7017    | 0.6277   | 0.6221   | 0.7060   | 0.6432   | 0.6402   | 0.7152   | 0.7049       |
| financial_ratios_14   | 0.5274   | 0.6558    | 0.6370   | 0.6339   | 0.6773   | 0.5890   | 0.5770   | 0.6578   | 0.6854       |


----

------

## Window Processing

In [3]:
train_batches,test_batches = data_obj.create_batches()

In [4]:
for train_period,test_period in zip(train_batches[:-1],test_batches[:-1]):
    print(train_period,test_period)

(1990, 1994) (1995, 1999)
(1995, 1999) (2000, 2004)
(2000, 2004) (2005, 2009)
(2005, 2009) (2010, 2014)
(2010, 2014) (2015, 2019)


In [5]:
models = {"MLP": MLP,
         "RUS BOOST": rus_boost,
          "Logit":logistic_regression_model,
         "Probit":probit_regression_model,
         "Xg Boost":xgb_model,
         "RF":random_forests}

In [6]:
res = defaultdict(lambda: defaultdict(dict))
for train_period,test_period in zip(train_batches[:-1],test_batches[:-1]):
    for model in models.keys():
        for data_item in data_items.keys():
            auc = results(data_obj,train_period,test_period,data_items[data_item],models[model],sample = 'over_under')
            res[str(train_period) + '-' +str(test_period)][data_item][model] = auc

Train Shape:  (8132, 42) (8132,)
Test Shape:  (51517, 42) (51517,)
Train Shape:  (8132, 28) (8132,)
Test Shape:  (51517, 28) (51517,)
Train Shape:  (8132, 14) (8132,)
Test Shape:  (51517, 14) (51517,)
Train Shape:  (8132, 42) (8132,)
Test Shape:  (51517, 42) (51517,)
Train Shape:  (8132, 28) (8132,)
Test Shape:  (51517, 28) (51517,)
Train Shape:  (8132, 14) (8132,)
Test Shape:  (51517, 14) (51517,)
Train Shape:  (8132, 42) (8132,)
Test Shape:  (51517, 42) (51517,)
Train Shape:  (8132, 28) (8132,)
Test Shape:  (51517, 28) (51517,)
Train Shape:  (8132, 14) (8132,)
Test Shape:  (51517, 14) (51517,)
Train Shape:  (8132, 42) (8132,)
Test Shape:  (51517, 42) (51517,)
Optimization terminated successfully.
         Current function value: 0.611003
         Iterations 12
Train Shape:  (8132, 28) (8132,)
Test Shape:  (51517, 28) (51517,)
Optimization terminated successfully.
         Current function value: 0.630004
         Iterations 11
Train Shape:  (8132, 14) (8132,)
Test Shape:  (51517, 14)



Train Shape:  (8132, 42) (8132,)
Test Shape:  (51517, 42) (51517,)
Test AUC Score: 0.7314284148399571
Train Shape:  (8132, 28) (8132,)
Test Shape:  (51517, 28) (51517,)
Test AUC Score: 0.7135930571488147
Train Shape:  (8132, 14) (8132,)
Test Shape:  (51517, 14) (51517,)
Test AUC Score: 0.6465615210118072
Train Shape:  (8132, 42) (8132,)
Test Shape:  (51517, 42) (51517,)
Random Forest Test AUC Score: 0.736251123680797
Train Shape:  (8132, 28) (8132,)
Test Shape:  (51517, 28) (51517,)
Random Forest Test AUC Score: 0.6945142643678378
Train Shape:  (8132, 14) (8132,)
Test Shape:  (51517, 14) (51517,)
Random Forest Test AUC Score: 0.6172431377514541
Train Shape:  (10240, 42) (10240,)
Test Shape:  (45675, 42) (45675,)
Train Shape:  (10240, 28) (10240,)
Test Shape:  (45675, 28) (45675,)
Train Shape:  (10240, 14) (10240,)
Test Shape:  (45675, 14) (45675,)
Train Shape:  (10240, 42) (10240,)
Test Shape:  (45675, 42) (45675,)
Train Shape:  (10240, 28) (10240,)
Test Shape:  (45675, 28) (45675,)
Tr

  L = q*self.pdf(q*XB)/self.cdf(q*XB)
  return np.dot(-L*(L+XB)*X.T,X)


Train Shape:  (10240, 28) (10240,)
Test Shape:  (45675, 28) (45675,)
Optimization terminated successfully.
         Current function value: nan
         Iterations 9


  L = q*self.pdf(q*XB)/self.cdf(q*XB)


Train Shape:  (10240, 14) (10240,)
Test Shape:  (45675, 14) (45675,)
Optimization terminated successfully.
         Current function value: 0.518702
         Iterations 16
Train Shape:  (10240, 42) (10240,)
Test Shape:  (45675, 42) (45675,)
Test AUC Score: 0.7681785633258986
Train Shape:  (10240, 28) (10240,)
Test Shape:  (45675, 28) (45675,)
Test AUC Score: 0.7612001471133426
Train Shape:  (10240, 14) (10240,)
Test Shape:  (45675, 14) (45675,)
Test AUC Score: 0.7447195053955136
Train Shape:  (10240, 42) (10240,)
Test Shape:  (45675, 42) (45675,)
Random Forest Test AUC Score: 0.7900588349378936
Train Shape:  (10240, 28) (10240,)
Test Shape:  (45675, 28) (45675,)
Random Forest Test AUC Score: 0.7754834912589059
Train Shape:  (10240, 14) (10240,)
Test Shape:  (45675, 14) (45675,)
Random Forest Test AUC Score: 0.7310400270308635
Train Shape:  (9032, 42) (9032,)
Test Shape:  (38240, 42) (38240,)
Train Shape:  (9032, 28) (9032,)
Test Shape:  (38240, 28) (38240,)
Train Shape:  (9032, 14) (90



Train Shape:  (9032, 14) (9032,)
Test Shape:  (38240, 14) (38240,)
Optimization terminated successfully.
         Current function value: 0.588084
         Iterations 13
Train Shape:  (9032, 42) (9032,)
Test Shape:  (38240, 42) (38240,)
Test AUC Score: 0.7046788734577126
Train Shape:  (9032, 28) (9032,)
Test Shape:  (38240, 28) (38240,)
Test AUC Score: 0.7042452674092351
Train Shape:  (9032, 14) (9032,)
Test Shape:  (38240, 14) (38240,)
Test AUC Score: 0.6588543812255119
Train Shape:  (9032, 42) (9032,)
Test Shape:  (38240, 42) (38240,)
Random Forest Test AUC Score: 0.7374325401456693
Train Shape:  (9032, 28) (9032,)
Test Shape:  (38240, 28) (38240,)
Random Forest Test AUC Score: 0.7270168026429269
Train Shape:  (9032, 14) (9032,)
Test Shape:  (38240, 14) (38240,)
Random Forest Test AUC Score: 0.6792918112871303
Train Shape:  (7608, 42) (7608,)
Test Shape:  (36109, 42) (36109,)
Train Shape:  (7608, 28) (7608,)
Test Shape:  (36109, 28) (36109,)
Train Shape:  (7608, 14) (7608,)
Test Shap



Train Shape:  (7608, 42) (7608,)
Test Shape:  (36109, 42) (36109,)
Test AUC Score: 0.7345239738151097
Train Shape:  (7608, 28) (7608,)
Test Shape:  (36109, 28) (36109,)
Test AUC Score: 0.731414268085115
Train Shape:  (7608, 14) (7608,)
Test Shape:  (36109, 14) (36109,)
Test AUC Score: 0.6632931519671669
Train Shape:  (7608, 42) (7608,)
Test Shape:  (36109, 42) (36109,)
Random Forest Test AUC Score: 0.7385179096089268
Train Shape:  (7608, 28) (7608,)
Test Shape:  (36109, 28) (36109,)
Random Forest Test AUC Score: 0.700557918969245
Train Shape:  (7608, 14) (7608,)
Test Shape:  (36109, 14) (36109,)
Random Forest Test AUC Score: 0.6686103327513135
Train Shape:  (7194, 42) (7194,)
Test Shape:  (33759, 42) (33759,)
Train Shape:  (7194, 28) (7194,)
Test Shape:  (33759, 28) (33759,)
Train Shape:  (7194, 14) (7194,)
Test Shape:  (33759, 14) (33759,)
Train Shape:  (7194, 42) (7194,)
Test Shape:  (33759, 42) (33759,)
Train Shape:  (7194, 28) (7194,)
Test Shape:  (33759, 28) (33759,)
Train Shape: 

In [7]:
res

defaultdict(<function __main__.<lambda>()>,
            {'(1990, 1994)-(1995, 1999)': defaultdict(dict,
                         {'features': {'MLP': 0.5973735004727807,
                           'RUS BOOST': 0.7409942693066788,
                           'Logit': 0.5876159824419167,
                           'Probit': 0.6351715081796403,
                           'Xg Boost': 0.7314284148399571,
                           'RF': 0.736251123680797},
                          'raw_financial_items_28': {'MLP': 0.5746119324663903,
                           'RUS BOOST': 0.7123182503400178,
                           'Logit': 0.5489464961002686,
                           'Probit': 0.5668716852597647,
                           'Xg Boost': 0.7135930571488147,
                           'RF': 0.6945142643678378},
                          'financial_ratios_14': {'MLP': 0.4983338971423387,
                           'RUS BOOST': 0.6374681636898182,
                           'Logit': 0.6918

### Window-Oversample

| Year Range                | MLP       | RUS BOOST | Logit     | Probit    | Xg Boost  | RF        |
|---------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
| (1990, 1994)-(1995, 1999) |           |           |           |           |           |           |
| features                  | 0.5974    | 0.7410    | 0.5876    | 0.6352    | 0.7314    | 0.7363    |
| raw_financial_items_28    | 0.5746    | 0.7123    | 0.5489    | 0.5669    | 0.7136    | 0.6945    |
| financial_ratios_14       | 0.4983    | 0.6375    | 0.6919    | 0.6915    | 0.6466    | 0.6172    |
|---------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
| (1995, 1999)-(2000, 2004) |           |           |           |           |           |           |
| features                  | 0.6211    | 0.7810    | 0.6430    | None      | 0.7682    | 0.7901    |
| raw_financial_items_28    | 0.6232    | 0.7785    | 0.5925    | None      | 0.7612    | 0.7755    |
| financial_ratios_14       | 0.5953    | 0.7287    | 0.7045    | 0.7060    | 0.7447    | 0.7310    |
|---------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
| (2000, 2004)-(2005, 2009) |           |           |           |           |           |           |
| features                  | 0.5487    | 0.7055    | 0.6158    | 0.6170    | 0.7047    | 0.7374    |
| raw_financial_items_28    | 0.5999    | 0.6816    | 0.6224    | 0.6047    | 0.7042    | 0.7270    |
| financial_ratios_14       | 0.5511    | 0.6589    | 0.6116    | 0.6094    | 0.6589    | 0.6793    |
|---------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
| (2005, 2009)-(2010, 2014) |           |           |           |           |           |           |
| features                  | 0.5401    | 0.7251    | 0.6066    | 0.6128    | 0.7345    | 0.7385    |
| raw_financial_items_28    | 0.5283    | 0.7401    | 0.5827    | 0.5709    | 0.7314    | 0.7006    |
| financial_ratios_14       | 0.5193    | 0.6285    | 0.6226    | 0.6468    | 0.6633    | 0.6686    |
|---------------------------|-----------|-----------|-----------|-----------|-----------|-----------|
| (2010, 2014)-(2015, 2019) |           |           |           |           |           |           |
| features                  | 0.5818    | 0.6777    | 0.4903    | 0.6331    | 0.7150    | 0.7122    |
| raw_financial_items_28    | 0.6354    | 0.6894    | 0.5458    | 0.5436    | 0.7123    | 0.6998    |
| financial_ratios_14       | 0.5323    | 0.5635    | 0.4689    | 0.6340    | 0.6731    | 0.6800    |
