In [2]:
### Core Packages
import pandas as pd
import numpy as np

### Visualization Packages
import matplotlib.pyplot as plt
import seaborn as sns

### Machine Learning Packages
from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFECV
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.ensemble import StackingClassifier
from sklearn.ensemble import VotingClassifier

from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, roc_curve, auc, confusion_matrix

### Setting env
import os, sys

folder = os.getcwd()
if(not(folder.endswith('tcc-machine-learning'))):
        os.chdir('..')
folder = os.getcwd()

### Functions
from dags import config
from dags.utils import generate_label, save_image

### Others
import warnings

In [2]:
def Average(lst):
    return sum(lst) / len(lst)

In [3]:
ticker = 'petr4.sa'
df_ticker = pd.read_csv(f'data/processed/{ticker}_processed.csv', encoding='utf8', delimiter=',')
df_ticker['date'] = pd.to_datetime(df_ticker['date'])
df_ticker_target = df_ticker.loc[(df_ticker['date'] >= '2015-01-01')]

days = 60
df_train = generate_label(days, df_ticker_target)
df_train = df_train.set_index('date')

transfor = 'normal'
dir_func = np.log if transfor == 'log' else lambda x:x
inf_func = np.exp if transfor == 'log' else lambda x:x

cols_to_transform = [col for col in df_train.columns if not 'target' in col]
df_train[cols_to_transform] = dir_func(df_train[cols_to_transform])


X = df_train.drop(columns = 'target')
y = df_train['target']
feature_names = X.columns

st_feat = MinMaxScaler()
X = X.sort_index(axis = 1)
X = st_feat.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

### Feature Selection
rf = RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4)
rf.fit(X, y)
  
rfe = RFECV(rf, cv=5, scoring="neg_mean_squared_error")
rfe.fit(X_train,y_train)
selected_features = list(np.array(feature_names)[rfe.get_support()])
df_selected = df_train[selected_features + ['target']]

classifiers = [
    ("KNC", KNeighborsClassifier(n_neighbors = 5, weights = "distance", p = 1)),
    ("SVC", SVC(kernel= "rbf", gamma = 3.5, C = 1000)),
    ("MLP", MLPClassifier(max_iter = 10000, activation = 'tanh', alpha = 0.0001, learning_rate = 'constant')),
    ("RF", RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4)),
    ("ADA", AdaBoostClassifier(n_estimators = 1000, base_estimator = DecisionTreeClassifier(max_depth=9, min_samples_leaf = 2)))
]

X = df_selected.drop(columns = 'target')
y = df_selected['target']
feature_names = X.columns

st_feat = MinMaxScaler()
X = X.sort_index(axis = 1)
X = st_feat.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

clf = StackingClassifier(estimators=classifiers, final_estimator = RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4))

clf.fit(X_train, y_train)

print('Acurancia Stacking', accuracy_score(y_test, clf.predict(X_test)))

KeyboardInterrupt: 

In [4]:
for rod in [6, 7, 8]:
    
    final_results = pd.DataFrame()

    for ticker in ['vale3.sa', 'itub4.sa', 'bbdc4.sa', 'petr4.sa']:

        df_ticker = pd.read_csv(f'data/processed/{ticker}_processed.csv', encoding='utf8', delimiter=',')
        df_ticker['date'] = pd.to_datetime(df_ticker['date'])
        df_ticker_target = df_ticker.loc[(df_ticker['date'] >= '2015-01-01')]

        for days in [3, 7, 15, 30, 60]:

            df_train = generate_label(days, df_ticker_target)
            df_train = df_train.set_index('date')

            transfor = 'normal'
            dir_func = np.log if transfor == 'log' else lambda x:x
            inf_func = np.exp if transfor == 'log' else lambda x:x

            cols_to_transform = [col for col in df_train.columns if not 'target' in col]
            df_train[cols_to_transform] = dir_func(df_train[cols_to_transform])


            X = df_train.drop(columns = 'target')
            y = df_train['target']
            feature_names = X.columns

            st_feat = MinMaxScaler()
            X = X.sort_index(axis = 1)
            X = st_feat.fit_transform(X)

            X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

            ### Feature Selection
            rf = RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4)
            rf.fit(X, y)
            
            rfe = RFECV(rf, cv=5, scoring="neg_mean_squared_error")
            rfe.fit(X_train,y_train)
            selected_features = list(np.array(feature_names)[rfe.get_support()])
            df_selected = df_train[selected_features + ['target']]

            classifiers = [
                ("KNC", KNeighborsClassifier(n_neighbors = 5, weights = "distance", p = 1)),
                ("SVC", SVC(kernel= "rbf", gamma = 3.5, C = 1000)),
                ("MLP", MLPClassifier(max_iter = 10000, activation = 'tanh', alpha = 0.0001, learning_rate = 'constant')),
                ("RF", RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4)),
                ("ADA", AdaBoostClassifier(n_estimators = 1000, base_estimator = DecisionTreeClassifier(max_depth=9, min_samples_leaf = 2)))
            ]

            X = df_selected.drop(columns = 'target')
            y = df_selected['target']
            feature_names = X.columns

            st_feat = MinMaxScaler()
            X = X.sort_index(axis = 1)
            X = st_feat.fit_transform(X)

            X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

            clf = StackingClassifier(estimators=classifiers, final_estimator = RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4))

            clf.fit(X_train, y_train)

            fpr, tpr, thresholds = roc_curve(y_test, clf.predict(X_test))

            confusion = sns.heatmap(confusion_matrix(y_test, clf.predict(X_test)), annot=True,  fmt='d', cmap='Reds')
            confusion.set_title("Matriz de Confusão", fontsize=18)
            confusion.set_ylabel("Classe Verdadeira")
            confusion.set_xlabel("Classe Prevista")
            save_image(img=confusion, name=f'confusion_{ticker}_{days}_{rod}')
            plt.clf()

            fpr, tpr, thresholds = roc_curve(y_test, clf.predict(X_test))
            roc = sns.lineplot(x = fpr, y = tpr)
            plt.plot([0,1], [0,1], 'k--')
            plt.axis([0, 1, 0, 1])
            plt.xlabel('Taxa de Falsos Positivos')
            plt.ylabel('Taxa de Verdadeiros Positivos')
            plt.title('Curva ROC', fontsize = 14)
            save_image(img=roc, name=f'roc_{ticker}_{days}_{rod}')
            plt.clf()

            dict_results = {'Ação': ticker,
                            'Dias': days,
                            'Acuracia': accuracy_score(y_test, clf.predict(X_test)),
                            'Precisão': precision_score(y_test, clf.predict(X_test)),
                            'Recall': recall_score(y_test, clf.predict(X_test)),
                            'F1': f1_score(y_test, clf.predict(X_test)),
                            'AUC': auc(fpr, tpr),
                            'Features': [selected_features]}
            
            results = pd.DataFrame(dict_results)
            
            final_results = pd.concat([final_results, results])

            final_results.to_excel(f'results_{ticker}_{days}_{rod}.xlsx', index = False)
        
        final_results.to_excel(f'results_{ticker}_{rod}.xlsx', index = False)

    final_results.to_excel(f'results_{rod}.xlsx', index = False)

Image confusion_vale3.sa_3_3 saved.
Image roc_vale3.sa_3_3 saved.
Image confusion_vale3.sa_7_3 saved.
Image roc_vale3.sa_7_3 saved.
Image confusion_vale3.sa_15_3 saved.
Image roc_vale3.sa_15_3 saved.
Image confusion_vale3.sa_30_3 saved.
Image roc_vale3.sa_30_3 saved.
Image confusion_vale3.sa_60_3 saved.
Image roc_vale3.sa_60_3 saved.
Image confusion_itub4.sa_3_3 saved.
Image roc_itub4.sa_3_3 saved.
Image confusion_itub4.sa_7_3 saved.
Image roc_itub4.sa_7_3 saved.
Image confusion_itub4.sa_15_3 saved.
Image roc_itub4.sa_15_3 saved.
Image confusion_itub4.sa_30_3 saved.
Image roc_itub4.sa_30_3 saved.
Image confusion_itub4.sa_60_3 saved.
Image roc_itub4.sa_60_3 saved.
Image confusion_bbdc4.sa_3_3 saved.
Image roc_bbdc4.sa_3_3 saved.
Image confusion_bbdc4.sa_7_3 saved.
Image roc_bbdc4.sa_7_3 saved.
Image confusion_bbdc4.sa_15_3 saved.
Image roc_bbdc4.sa_15_3 saved.
Image confusion_bbdc4.sa_30_3 saved.
Image roc_bbdc4.sa_30_3 saved.
Image confusion_bbdc4.sa_60_3 saved.
Image roc_bbdc4.sa_60_3

<Figure size 432x288 with 0 Axes>

### Avaliando dados posteriores

In [22]:
ticker = 'petr4.sa'
df_ticker = pd.read_csv(f'data/processed/{ticker}_processed.csv', encoding='utf8', delimiter=',')
df_ticker['date'] = pd.to_datetime(df_ticker['date'])
df_ticker_target = df_ticker.loc[(df_ticker['date'] >= '2015-01-01') & (df_ticker['date'] <= '2022-09-16')]
df_ticker_valid = df_ticker.loc[(df_ticker['date'] >= '2022-09-17') & (df_ticker['date'] <= '2022-11-13')]

days = 30
df_train = generate_label(days, df_ticker_target)
df_train = df_train.set_index('date')

df_valid = generate_label(days, df_ticker_valid)
df_valid = df_valid.set_index('date')

transfor = 'normal'
dir_func = np.log if transfor == 'log' else lambda x:x
inf_func = np.exp if transfor == 'log' else lambda x:x

cols_to_transform = [col for col in df_train.columns if not 'target' in col]
df_train[cols_to_transform] = dir_func(df_train[cols_to_transform])

cols_to_transform = [col for col in df_valid.columns if not 'target' in col]
df_valid[cols_to_transform] = dir_func(df_valid[cols_to_transform])

X = df_train.drop(columns = 'target')
y = df_train['target']
feature_names = X.columns

st_feat = MinMaxScaler()
X = X.sort_index(axis = 1)
st_feat.fit(X)
X = st_feat.transform(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

### Feature Selection
rf = RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4)
rf.fit(X, y)
  
rfe = RFECV(rf, cv=5, scoring="neg_mean_squared_error")
rfe.fit(X_train,y_train)
selected_features = list(np.array(feature_names)[rfe.get_support()])
df_selected = df_train[selected_features + ['target']]

df_selected_valid = df_valid[selected_features + ['target']]

classifiers = [
    ("KNC", KNeighborsClassifier(n_neighbors = 5, weights = "distance", p = 1)),
    ("SVC", SVC(kernel= "rbf", gamma = 3.5, C = 1000)),
    ("MLP", MLPClassifier(max_iter = 10000, activation = 'tanh', alpha = 0.0001, learning_rate = 'constant')),
    ("RF", RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4)),
    ("ADA", AdaBoostClassifier(n_estimators = 1000, base_estimator = DecisionTreeClassifier(max_depth=9, min_samples_leaf = 2)))
]

X = df_selected.drop(columns = 'target')
y = df_selected['target']
feature_names = X.columns

x_valid = df_selected_valid.drop(columns = 'target')
y_valid = df_selected_valid['target']

st_feat = MinMaxScaler()
X = X.sort_index(axis = 1)
st_feat.fit(X)
X = st_feat.transform(X)

x_valid = x_valid.sort_index(axis = 1)
x_valid = st_feat.transform(x_valid)

X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

clf = StackingClassifier(estimators=classifiers, final_estimator = RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4))

clf.fit(X_train, y_train)

In [None]:
fpr, tpr, thresholds = roc_curve(y_valid, clf.predict(x_valid))

confusion = sns.heatmap(confusion_matrix(y_valid, clf.predict(x_valid)), annot=True,  fmt='d', cmap='Reds')
confusion.set_title("Matriz de Confusão", fontsize=18)
confusion.set_ylabel("Classe Verdadeira")
confusion.set_xlabel("Classe Prevista")

fpr, tpr, thresholds = roc_curve(y_valid, clf.predict(x_valid))
roc = sns.lineplot(x = fpr, y = tpr)
plt.plot([0,1], [0,1], 'k--')
plt.axis([0, 1, 0, 1])
plt.xlabel('Taxa de Falsos Positivos')
plt.ylabel('Taxa de Verdadeiros Positivos')
plt.title('Curva ROC', fontsize = 14)

dict_results = {'Ação': ticker,
                'Dias': days,
                'Acuracia': accuracy_score(y_valid, clf.predict(x_valid)),
                'Precisão': precision_score(y_valid, clf.predict(x_valid)),
                'Recall': recall_score(y_valid, clf.predict(x_valid)),
                'F1': f1_score(y_valid, clf.predict(x_valid)),
                'AUC': auc(fpr, tpr),
                'Features': [selected_features]}

results = pd.DataFrame(dict_results)

In [16]:
for rod in [1, 2, 3, 4, 5]:
    
    final_results = pd.DataFrame()
    final_results_valid = pd.DataFrame()

    for ticker in ['vale3.sa', 'itub4.sa', 'bbdc4.sa', 'petr4.sa']:

        df_ticker = pd.read_csv(f'data/processed/{ticker}_processed.csv', encoding='utf8', delimiter=',')
        df_ticker['date'] = pd.to_datetime(df_ticker['date'])
        df_ticker_target = df_ticker.loc[(df_ticker['date'] >= '2015-01-01') & (df_ticker['date'] < '2022-08-01')]
        df_ticker_valid = df_ticker.loc[(df_ticker['date'] >= '2022-08-01') & (df_ticker['date'] <= '2022-11-13')]
        
        for days in [3, 7, 15, 30, 60]:

            df_train = generate_label(days, df_ticker_target)
            df_train = df_train.set_index('date')

            df_valid = generate_label(days, df_ticker_valid)
            df_valid = df_valid.set_index('date')

            transfor = 'normal'
            dir_func = np.log if transfor == 'log' else lambda x:x
            inf_func = np.exp if transfor == 'log' else lambda x:x

            cols_to_transform = [col for col in df_train.columns if not 'target' in col]
            df_train[cols_to_transform] = dir_func(df_train[cols_to_transform])

            cols_to_transform = [col for col in df_valid.columns if not 'target' in col]
            df_valid[cols_to_transform] = dir_func(df_valid[cols_to_transform])

            X = df_train.drop(columns = 'target')
            y = df_train['target']
            feature_names = X.columns

            st_feat = MinMaxScaler()
            X = X.sort_index(axis = 1)
            X = st_feat.fit_transform(X)

            X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

            ### Feature Selection
            rf = RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4)
            rf.fit(X, y)
            
            rfe = RFECV(rf, cv=5, scoring="neg_mean_squared_error")
            rfe.fit(X_train,y_train)
            selected_features = list(np.array(feature_names)[rfe.get_support()])
            df_selected = df_train[selected_features + ['target']]
            df_selected_valid = df_valid[selected_features + ['target']]

            classifiers = [
            ("KNC", KNeighborsClassifier(n_neighbors = 5, weights = "distance", p = 1)),
            ("SVC", SVC(kernel= "rbf", gamma = 3.5, C = 1000)),
            ("MLP", MLPClassifier(max_iter = 10000, activation = 'tanh', alpha = 0.0001, learning_rate = 'constant')),
            ("RF", RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4)),
            ("ADA", AdaBoostClassifier(n_estimators = 1000, base_estimator = DecisionTreeClassifier(max_depth=9, min_samples_leaf = 2)))
            ]

            X = df_selected.drop(columns = 'target')
            y = df_selected['target']
            feature_names = X.columns

            x_valid = df_selected_valid.drop(columns = 'target')
            y_valid = df_selected_valid['target']

            st_feat = MinMaxScaler()
            X = X.sort_index(axis = 1)
            st_feat.fit(X)
            X = st_feat.transform(X)

            x_valid = x_valid.sort_index(axis = 1)
            x_valid = st_feat.transform(x_valid)

            X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

            clf = StackingClassifier(estimators=classifiers, final_estimator = RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4))

            clf.fit(X_train, y_train)

            fpr, tpr, thresholds = roc_curve(y_test, clf.predict(X_test))

            dict_results = {'Ação': ticker,
                            'Dias': days,
                            'Acuracia': accuracy_score(y_test, clf.predict(X_test)),
                            'Precisão': precision_score(y_test, clf.predict(X_test)),
                            'Recall': recall_score(y_test, clf.predict(X_test)),
                            'F1': f1_score(y_test, clf.predict(X_test)),
                            'AUC': auc(fpr, tpr),
                            'Features': [selected_features]}
            
            results = pd.DataFrame(dict_results)
            
            final_results = pd.concat([final_results, results])

            fpr_valid, tpr_valid, thresholds_valid = roc_curve(y_valid, clf.predict(x_valid))

            dict_results_valid = {'Ação': ticker,
                                  'Dias': days,
                                  'Acuracia': accuracy_score(y_valid, clf.predict(x_valid)),
                                  'Precisão': precision_score(y_valid, clf.predict(x_valid)),
                                  'Recall': recall_score(y_valid, clf.predict(x_valid)),
                                  'F1': f1_score(y_valid, clf.predict(x_valid)),
                                  'AUC': auc(fpr_valid, tpr_valid),
                                  'Features': [selected_features]}
            
            results_valid = pd.DataFrame(dict_results_valid)
            
            final_results_valid = pd.concat([final_results_valid, results_valid])

            labels = y_valid.to_frame()
            teste = clf.predict(x_valid)
            se = pd.Series(teste)
            labels[f'predicted_{rod}'] = se.values

            final_results.to_excel(f'results_{ticker}_{days}_{rod}.xlsx', index = False)
            final_results_valid.to_excel(f'results_{ticker}_{days}_{rod}_valid.xlsx', index = False)
            
            labels.to_excel(f'labels_{ticker}_{days}_{rod}_valid.xlsx', index = False)

        final_results.to_excel(f'results_{ticker}_{rod}.xlsx', index = False)
        final_results_valid.to_excel(f'results_{ticker}_{rod}_valid.xlsx', index = False)

    final_results.to_excel(f'results_{rod}.xlsx', index = False)
    final_results_valid.to_excel(f'results_{rod}_valid.xlsx', index = False)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))


In [17]:
for rod in [6, 7, 8, 9, 10]:
    
    final_results = pd.DataFrame()
    final_results_valid = pd.DataFrame()

    for ticker in ['vale3.sa', 'itub4.sa', 'bbdc4.sa', 'petr4.sa']:

        df_ticker = pd.read_csv(f'data/processed/{ticker}_processed.csv', encoding='utf8', delimiter=',')
        df_ticker['date'] = pd.to_datetime(df_ticker['date'])
        df_ticker_target = df_ticker.loc[(df_ticker['date'] >= '2015-01-01') & (df_ticker['date'] < '2022-08-01')]
        df_ticker_valid = df_ticker.loc[(df_ticker['date'] >= '2022-08-01') & (df_ticker['date'] <= '2022-11-13')]
        
        for days in [3, 7, 15, 30, 60]:

            df_train = generate_label(days, df_ticker_target)
            df_train = df_train.set_index('date')

            df_valid = generate_label(days, df_ticker_valid)
            df_valid = df_valid.set_index('date')

            transfor = 'normal'
            dir_func = np.log if transfor == 'log' else lambda x:x
            inf_func = np.exp if transfor == 'log' else lambda x:x

            cols_to_transform = [col for col in df_train.columns if not 'target' in col]
            df_train[cols_to_transform] = dir_func(df_train[cols_to_transform])

            cols_to_transform = [col for col in df_valid.columns if not 'target' in col]
            df_valid[cols_to_transform] = dir_func(df_valid[cols_to_transform])

            X = df_train.drop(columns = 'target')
            y = df_train['target']
            feature_names = X.columns

            st_feat = MinMaxScaler()
            X = X.sort_index(axis = 1)
            X = st_feat.fit_transform(X)

            X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

            ### Feature Selection
            rf = RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4)
            rf.fit(X, y)
            
            rfe = RFECV(rf, cv=5, scoring="neg_mean_squared_error")
            rfe.fit(X_train,y_train)
            selected_features = list(np.array(feature_names)[rfe.get_support()])
            df_selected = df_train[selected_features + ['target']]
            df_selected_valid = df_valid[selected_features + ['target']]

            classifiers = [
            ("KNC", KNeighborsClassifier(n_neighbors = 5, weights = "distance", p = 1)),
            ("SVC", SVC(kernel= "rbf", gamma = 3.5, C = 1000)),
            ("MLP", MLPClassifier(max_iter = 10000, activation = 'tanh', alpha = 0.0001, learning_rate = 'constant')),
            ("RF", RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4)),
            ("ADA", AdaBoostClassifier(n_estimators = 1000, base_estimator = DecisionTreeClassifier(max_depth=9, min_samples_leaf = 2)))
            ]

            X = df_selected.drop(columns = 'target')
            y = df_selected['target']
            feature_names = X.columns

            x_valid = df_selected_valid.drop(columns = 'target')
            y_valid = df_selected_valid['target']

            st_feat = MinMaxScaler()
            X = X.sort_index(axis = 1)
            st_feat.fit(X)
            X = st_feat.transform(X)

            x_valid = x_valid.sort_index(axis = 1)
            x_valid = st_feat.transform(x_valid)

            X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y)

            clf = StackingClassifier(estimators=classifiers, final_estimator = RandomForestClassifier(n_estimators=400, min_samples_leaf=2, oob_score=True, bootstrap=True, n_jobs=4))

            clf.fit(X_train, y_train)

            fpr, tpr, thresholds = roc_curve(y_test, clf.predict(X_test))

            dict_results = {'Ação': ticker,
                            'Dias': days,
                            'Acuracia': accuracy_score(y_test, clf.predict(X_test)),
                            'Precisão': precision_score(y_test, clf.predict(X_test)),
                            'Recall': recall_score(y_test, clf.predict(X_test)),
                            'F1': f1_score(y_test, clf.predict(X_test)),
                            'AUC': auc(fpr, tpr),
                            'Features': [selected_features]}
            
            results = pd.DataFrame(dict_results)
            
            final_results = pd.concat([final_results, results])

            fpr_valid, tpr_valid, thresholds_valid = roc_curve(y_valid, clf.predict(x_valid))

            dict_results_valid = {'Ação': ticker,
                                  'Dias': days,
                                  'Acuracia': accuracy_score(y_valid, clf.predict(x_valid)),
                                  'Precisão': precision_score(y_valid, clf.predict(x_valid)),
                                  'Recall': recall_score(y_valid, clf.predict(x_valid)),
                                  'F1': f1_score(y_valid, clf.predict(x_valid)),
                                  'AUC': auc(fpr_valid, tpr_valid),
                                  'Features': [selected_features]}
            
            results_valid = pd.DataFrame(dict_results_valid)
            
            final_results_valid = pd.concat([final_results_valid, results_valid])

            labels = y_valid.to_frame()
            teste = clf.predict(x_valid)
            se = pd.Series(teste)
            labels[f'predicted_{rod}'] = se.values

            final_results.to_excel(f'results_{ticker}_{days}_{rod}.xlsx', index = False)
            final_results_valid.to_excel(f'results_{ticker}_{days}_{rod}_valid.xlsx', index = False)
            
            labels.to_excel(f'labels_{ticker}_{days}_{rod}_valid.xlsx', index = False)

        final_results.to_excel(f'results_{ticker}_{rod}.xlsx', index = False)
        final_results_valid.to_excel(f'results_{ticker}_{rod}_valid.xlsx', index = False)

    final_results.to_excel(f'results_{rod}.xlsx', index = False)
    final_results_valid.to_excel(f'results_{rod}_valid.xlsx', index = False)

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, "true nor predicted", "F-score is", len(true_sum))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


: 