# Load data + libs

In [1]:
# Import libs
from utils_dta_processing import *
from utils_ml_train import *
from utils_ml_train.hyperparams_ML import lrg, xgb_class, \
    r2, rmse, mape, \
    accuracy, roc_auc, precision, recall, \
    classification_report

# Load data
df_revenue = pd.read_csv('../data_for_modelling/df_revenue.csv')
df_ebitda = pd.read_csv('../data_for_modelling/df_ebitda.csv')
df_roa = pd.read_csv('../data_for_modelling/df_roa.csv')
df_roe = pd.read_csv('../data_for_modelling/df_roe.csv')
df_roic = pd.read_csv('../data_for_modelling/df_roic.csv')
df_roce = pd.read_csv('../data_for_modelling/df_roce.csv')
df_value = pd.read_csv('../data_for_modelling/df_value_ad.csv')

# Modelling

Data

In [None]:
# Make input - test set
revenue_input, revenue_test = input_test_split(df_revenue)
roa_input, roa_test = input_test_split(df_roa)
roe_input, roe_test = input_test_split(df_roe)
roic_input, roic_test = input_test_split(df_roic)
roce_input, roce_test = input_test_split(df_roce)

value_input, value_test = input_test_split(df_value)
ebitda_input, ebitda_test = input_test_split(df_ebitda)

Modelling

In [None]:
# Revenue

# Hyper-param + model choice
revenue_obj = InputData(revenue_input, 'company', 'year', 'revenue', reg=True)
display(revenue_obj.optimal_param(n_splits=3, test_size=2))

# Train val
X_train, y_train, X_val, y_val = train_val_split(revenue_input, 'revenue')
model = lrg(fit_intercept=True)
model.fit(X_train, y_train)
y_pred = model.predict(X_val)

print('----------- TRAIN RESULTS -----------')
print(f'R2: {r2(y_pred, y_val)}')
print(f'RMSE: {rmse(y_pred, y_val)}')
print(f'MAPE: {mape(y_pred, y_val)}')
# Test
print('----------- TEST RESULTS -----------')
X_test = revenue_test.drop(['company', 'revenue', 'year'], axis=1)
y_test = revenue_test['revenue']
y_pred_test = model.predict(X_test)
print(f'R2: {r2(y_pred_test, y_test)}')
print(f'RMSE: {rmse(y_pred_test, y_test)}')
print(f'MAPE: {mape(y_pred_test, y_test)}')


In [None]:
# Value_add

# Hyper-param + model choice
value_obj = InputData(value_input, 'company', 'year', 'value_add', reg=False)
display(value_obj.optimal_param(n_splits=3, test_size=2))

# Train val
X_train, y_train, X_val, y_val = train_val_split(value_input, 'value_add')
model = xgb_class(n_estimators= 200, learning_rate=0.02, max_depth=10,
                subsample=0.5, scale_pos_weight=0.223
                  )
model.fit(X_train, y_train)
y_pred = model.predict(X_val)


print('----------- TRAIN RESULTS -----------')
print(f'Accuracy: {accuracy(y_pred, y_val)}')
print(f'Precision: {precision(y_pred, y_val)}')
print(f'Recall: {recall(y_pred, y_val)}')
print(f'ROC_AUC: {roc_auc(y_pred, y_val)}')
# Test
print('----------- TEST RESULTS -----------')
X_test = value_test.drop(['company', 'value_add', 'year'], axis=1)
y_test = value_test['value_add']
y_pred_test = model.predict(X_test)
print(f'Accuracy: {accuracy(y_pred_test, y_test)}')
print(f'Precision: {precision(y_pred_test, y_test)}')
print(f'Recall: {recall(y_pred_test, y_test)}')
print(f'ROC_AUC: {roc_auc(y_pred_test, y_test)}')

print(classification_report(y_val, y_pred, digits=3))

In [None]:
# Ebitda

# # Hyper-param + model choice
# ebitda_obj = InputData(ebitda_input, 'company', 'year', 'ebitda', reg=False)
# display(ebitda_obj.optimal_param(n_splits=3, test_size=2))

# Train val
X_train, y_train, X_val, y_val = train_val_split(ebitda_input, 'ebitda')
model = xgb_class(n_estimators= 300, learning_rate=0.03, max_depth=15,
                subsample=0.7, scale_pos_weight= 0.15134
                  )
model.fit(X_train, y_train)
y_pred = model.predict(X_val)


print('----------- TRAIN RESULTS -----------')
print(f'Accuracy: {accuracy(y_pred, y_val)}')
print(f'Precision: {precision(y_pred, y_val)}')
print(f'Recall: {recall(y_pred, y_val)}')
print(f'ROC_AUC: {roc_auc(y_pred, y_val)}')
# Test
print('----------- TEST RESULTS -----------')
X_test = ebitda_test.drop(['company', 'ebitda', 'year'], axis=1)
y_test = ebitda_test['ebitda']
y_pred_test = model.predict(X_test)
print(f'Accuracy: {accuracy(y_pred_test, y_test)}')
print(f'Precision: {precision(y_pred_test, y_test)}')
print(f'Recall: {recall(y_pred_test, y_test)}')
print(f'ROC_AUC: {roc_auc(y_pred_test, y_test)}')

print(classification_report(y_val, y_pred, digits=3))