## Dados de treino e teste

In [1]:
# importar dados
import pandas as pd

retornos = pd.read_excel('retorno_hist.xlsx', index_col="data")

In [2]:
# Divisão daods treino e teste

# Teste últimos 30 dias
X_teste = retornos.iloc[-30:,:-1].to_numpy()
y_teste = retornos.iloc[-30:,-1:].to_numpy().ravel()

# Treino histórico -30 dias
X_treino = retornos.iloc[:-30,:-1].to_numpy()
y_treino = retornos.iloc[:-30,-1:].to_numpy().ravel()

print("Teste: ", X_teste.shape, y_teste.shape)
print("Treino: ", X_treino.shape, y_treino.shape)

Teste:  (30, 3) (30,)
Treino:  (3191, 3) (3191,)


In [3]:
# Normalização dos dados
from sklearn.preprocessing import MinMaxScaler

minmax = MinMaxScaler()
X_treino_nrm = minmax.fit_transform(X_treino)
X_teste_nrm = minmax.transform(X_teste)

## Regressão logística

In [4]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score
from sklearn.metrics import precision_recall_curve, PrecisionRecallDisplay

# Modelo
log_reg = LogisticRegression(class_weight='balanced', random_state=13)

# Hiperparâemtros
hyperparam_grid = {
    'fit_intercept': [True, False],
    'C': [0.01, 0.1, 1, 10, 100]
}

# Busca aleatório com validação cruzada
busca = GridSearchCV(log_reg, hyperparam_grid, scoring="roc_auc", cv=5, refit=True,
                          n_jobs=-1, verbose=3)

busca.fit(X_treino_nrm, y_treino)

print("Best score: {}".format(busca.best_score_))
print("Best param: {}".format(busca.best_params_))

Fitting 5 folds for each of 10 candidates, totalling 50 fits
Best score: 0.5214971843362332
Best param: {'C': 0.01, 'fit_intercept': False}


In [5]:
# Implementar melhor modelo
reg_best = busca.best_estimator_.fit(X_treino_nrm, y_treino)

# Predição
pred = reg_best.predict(X_teste_nrm)

# Resulrados conjunto de Teste
print('\n')
cm = confusion_matrix(y_teste, pred)
print ("Confusion Matrix : \n", cm, '\n')

print('Accuracy = ', accuracy_score(y_teste, pred))
print('Precision = ', precision_score(y_teste, pred))
print('Recall = ', recall_score(y_teste, pred))



Confusion Matrix : 
 [[10  6]
 [ 9  5]] 

Accuracy =  0.5
Precision =  0.45454545454545453
Recall =  0.35714285714285715


## Floresta aleatória

In [6]:
from sklearn.ensemble import RandomForestClassifier

# Modelo
flr_alt = RandomForestClassifier(class_weight='balanced', max_depth=100, random_state=13)

# Hiperparâemtros
hyperparam_grid = {
    'criterion' : ['entropy', 'gini'],
    'min_samples_leaf': [1, 10, 100],
    'min_samples_split': [2, 10, 100],
    'n_estimators': [10, 100, 1000]
}

# Busca aleatório com validação cruzada
busca = GridSearchCV(flr_alt, hyperparam_grid, scoring="roc_auc", cv=5, refit=True,
                          n_jobs=-1, verbose=3)

busca.fit(X_treino_nrm, y_treino)

print("Best score: {}".format(busca.best_score_))
print("Best param: {}".format(busca.best_params_))

Fitting 5 folds for each of 54 candidates, totalling 270 fits
Best score: 0.5240700468170975
Best param: {'criterion': 'entropy', 'min_samples_leaf': 100, 'min_samples_split': 2, 'n_estimators': 10}


In [7]:
# Implementar melhor modelo
flr_best = busca.best_estimator_.fit(X_treino_nrm, y_treino)

# Predição
pred = flr_best.predict(X_teste_nrm)

# Resulrados conjunto de Teste
print('\n')
cm = confusion_matrix(y_teste, pred)
print ("Confusion Matrix : \n", cm, '\n')

print('Accuracy = ', accuracy_score(y_teste, pred))
print('Precision = ', precision_score(y_teste, pred))
print('Recall = ', recall_score(y_teste, pred))



Confusion Matrix : 
 [[13  3]
 [ 7  7]] 

Accuracy =  0.6666666666666666
Precision =  0.7
Recall =  0.5


## SVM 

In [8]:
from sklearn.svm import SVC

# Modelo
svm = SVC(class_weight = 'balanced', random_state=13)

# Hiperparâemtros
hyperparam_grid = {
    'kernel' : ['sigmoid', 'rbf'],
    'C': [0.01, 0.1, 1, 10, 100],
    'gamma': [0.1, 0.01, 0.001]
}

# Busca aleatório com validação cruzada
busca = GridSearchCV(svm, hyperparam_grid, scoring="roc_auc", cv=5, refit=True,
                          n_jobs=-1, verbose=3)

busca.fit(X_treino_nrm, y_treino)

print("Best score: {}".format(busca.best_score_))
print("Best param: {}".format(busca.best_params_))

Fitting 5 folds for each of 30 candidates, totalling 150 fits
Best score: 0.5216116461878298
Best param: {'C': 100, 'gamma': 0.001, 'kernel': 'rbf'}


In [9]:
# Implementar melhor modelo
svm_best = busca.best_estimator_.fit(X_treino_nrm, y_treino)

# Predição
pred = svm_best.predict(X_teste_nrm)

# Resulrados conjunto de Teste
print('\n')
cm = confusion_matrix(y_teste, pred)
print ("Confusion Matrix : \n", cm, '\n')

print('Accuracy = ', accuracy_score(y_teste, pred))
print('Precision = ', precision_score(y_teste, pred))
print('Recall = ', recall_score(y_teste, pred))



Confusion Matrix : 
 [[15  1]
 [10  4]] 

Accuracy =  0.6333333333333333
Precision =  0.8
Recall =  0.2857142857142857
