# Dependencies

In [30]:
# pandas
import pandas as pd

# sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.model_selection import cross_val_score, cross_validate

# tf keras
from keras.optimizers import Adam
from keras.models import Sequential
from keras.layers import Dense
from keras.losses import Huber

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Dataset

In [31]:
df = pd.read_csv('/content/drive/My Drive/Grupo Turing/value2.csv')

In [32]:
#df.head()
df.shape

(1330, 12)

In [None]:
df = df.dropna()
X = df.drop(['Unnamed: 0', 'Stock', 'HPR'], axis=1)
y = df['HPR']

In [None]:
df.isna().sum(axis=0)

Unnamed: 0         0
ROA                0
delta_ROA          0
CFO                0
Accrual            0
delta_leverage     0
delta_liquidity    0
issue_new          0
delta_margin       0
delta_turn_over    0
HPR                0
Stock              0
dtype: int64

In [None]:
X.columns

Index(['ROA', 'delta_ROA', 'CFO', 'Accrual', 'delta_leverage',
       'delta_liquidity', 'issue_new', 'delta_margin', 'delta_turn_over'],
      dtype='object')

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
X_train_norm = (X_train - X_train.mean(axis=0)) /  X_train.std(axis=0)
#X_test_norm = (X_test - X_train.mean(axis=0)) /  X_train.std(axis=0)

# Sklearn Model

In [None]:
model = ExtraTreesRegressor(bootstrap=False, ccp_alpha=0.0, criterion='mse',
                    max_depth=None, max_features='auto', max_leaf_nodes=None,
                    max_samples=None, min_impurity_decrease=0.0,
                    min_impurity_split=None, min_samples_leaf=1,
                    min_samples_split=2, min_weight_fraction_leaf=0.0,
                    n_estimators=100, n_jobs=-1, oob_score=False,
                    random_state=2025, verbose=0, warm_start=False)
model.fit(X_train, y_train)

ExtraTreesRegressor(bootstrap=False, ccp_alpha=0.0, criterion='mse',
                    max_depth=None, max_features='auto', max_leaf_nodes=None,
                    max_samples=None, min_impurity_decrease=0.0,
                    min_impurity_split=None, min_samples_leaf=1,
                    min_samples_split=2, min_weight_fraction_leaf=0.0,
                    n_estimators=100, n_jobs=-1, oob_score=False,
                    random_state=2025, verbose=0, warm_start=False)

In [None]:
y_hat = model.predict(X_test)

In [None]:
def evaluate(y_test, y_hat):
    #r2 = r2_score(y_test, y_hat)
    mae = mean_absolute_error(y_test, y_hat)
    mse = mean_squared_error(y_test, y_hat)
    #print('R2: ', r2)
    print('MAE: ', mae)
    print('MSE: ', mse)

In [None]:
evaluate(y_test, y_hat)

MAE:  0.22009383192420184
MSE:  0.09184166330717501


## Cross Validation

In [None]:
extra = ExtraTreesRegressor(bootstrap=False, ccp_alpha=0.0, criterion='mse',
                    max_depth=None, max_features='auto', max_leaf_nodes=None,
                    max_samples=None, min_impurity_decrease=0.0,
                    min_impurity_split=None, min_samples_leaf=1,
                    min_samples_split=2, min_weight_fraction_leaf=0.0,
                    n_estimators=100, n_jobs=-1, oob_score=False,
                    random_state=2025, verbose=0, warm_start=False)

In [None]:
results = cross_validate(extra, X, y, scoring=('r2', 'neg_mean_absolute_error', 'neg_mean_squared_error'), cv=4)

In [None]:
results

{'fit_time': array([1.56120062, 0.38427162, 0.37004495, 0.38627386]),
 'score_time': array([0.10578465, 0.10372519, 0.10390759, 0.10380101]),
 'test_neg_mean_absolute_error': array([-0.198416  , -0.21024467, -0.22975767, -0.21474333]),
 'test_neg_mean_squared_error': array([-0.06432838, -0.08195244, -0.09321239, -0.07894024]),
 'test_r2': array([-0.06734919,  0.02637383,  0.06716149,  0.03139932])}

# Neural Networks


In [None]:
model = Sequential()
model.add(Dense(12, activation='tanh', input_dim=X.shape[1], kernel_initializer="he_normal"))
model.add(Dense(4, activation='tanh', kernel_initializer="he_normal"))
model.add(Dense(1, activation='linear'))

In [None]:
opt = Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss=Huber(), metrics=['mse', 'mae'])

In [None]:
model.fit(X_train_norm, y_train, epochs=100)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<tensorflow.python.keras.callbacks.History at 0x7f12853feb38>

## Evaluation
### Train Set

In [None]:
y_hat_train = model.predict(X_train_norm)

In [None]:
evaluate(y_train, y_hat_train)

MAE:  0.19367780720212305
MSE:  0.06303385191190441


### Test Set

In [None]:
y_hat = model.predict(X_test)

In [None]:
evaluate(y_test, y_hat)

MAE:  0.22707991928403196
MSE:  0.09728254840674279


# Export Results

In [36]:
df_train = df[df.index.isin(X_train.index)]
df_train['HPR_predito'] = y_hat_train

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [38]:
df_test = df[df.index.isin(X_test.index)]
df_test['HPR_predito'] = y_hat

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [43]:
df_final = pd.concat([df_train, df_test])
df_final = df_final.rename(columns={'Unnamed: 0': 'data'})

In [44]:
df_final.to_csv('/content/drive/My Drive/Grupo Turing/value2_nn_predito.csv', index=False)