# **1. Importações**

## Importação dos Avaliadores

In [21]:
#%pip install autorank

In [22]:
from utils.FileManager import FileManager
import pandas as pd
import numpy as np

import warnings
warnings.filterwarnings("ignore")

from autorank import autorank, plot_stats, create_report


In [23]:
from avaliacao.AvaliadorDriftBase import AvaliadorBatch
from avaliacao.AvaliadorDriftBase import AvaliadorPassivo
from experimento.ExperimentoDrift import Experimento

## Frameworks

In [24]:
# importando o framework
from frame_oasis.OASIS import OASIS
from frame_winks.WinKS import WinKS

## Importação dos Detectores

In [25]:
# Importando detectores de drift
from detectores.ADWINDetector import ADWINDetector
from detectores.DDMDetector import DDMDetector
from detectores.EDDMDetector import EDDMDetector
from detectores.FHDDMDetector import FHDDMDetector
from detectores.HDDM_ADetector import HDDM_ADetector
from detectores.PageHinkleyDetector import PageHinkleyDetector
from detectores.HDDM_WDetector import HDDM_WDetector
from detectores.KSWINDetector import KSWINDetector

## Importação dos Regressores Off-line

In [26]:
# Importando modelos offline
from regressores.modelosOffline.LinearRegressionModelo import LinearRegressionModelo
from regressores.modelosOffline.KneighborsRegressorModelo import KneighborsRegressorModelo
from regressores.modelosOffline.LassoRegressionModelo import LassoRegressionModelo
from regressores.modelosOffline.RidgeRegressionModelo import RidgeRegressionModelo
from regressores.modelosOffline.SVRModelo import SVRModelo
from regressores.modelosOffline.RandomForestModelo import RandomForestModelo
from regressores.modelosOffline.MLPRegressorModelo import MLPRegressorModelo

## Importação dos Regressores On-line

In [27]:
# Importando modelos online
from regressores.modelosOnline.BayesianLinearRegressionModelo import BayesianLinearRegressionModelo
from regressores.modelosOnline.HoeffdingTreeRegressorModelo import HoeffdingTreeRegressorModelo
from regressores.modelosOnline.KNNRegressorOnlineModelo import KNNRegressorOnlineModelo
from regressores.modelosOnline.LinearRegressionOnlineModelo import LinearRegressionOnlineModelo
from regressores.modelosOnline.OSELM import OSELMModelo

# **2. Definindo os Dados e Parâmetros**

## Dados

In [28]:
# Lista de séries temporais
series = [
    "AAPL",
    "B3SA3.SA",
    "IXIC",
    "DJI",
    "GSPC",
    "BVSP",
    "USDBRLX"
]

## Parâmetros

In [29]:
# Parâmetros do experimento
tamanho_batch = 300
lags = 10
repeticoes = 10  # Número de execuções para cada modelo-série

# **3. Definindo os Modelos e Avaliadores**

"""{
        "nome": "LR + ADWIN",
        "avaliador": AvaliadorBatch(),
        "modelo": LinearRegressionModelo,
        "detector": ADWINDetector,
        "deterministico": True
    },
    
    {
        "nome": "LR + HDDM_A",
        "avaliador": AvaliadorBatch(),
        "modelo": LinearRegressionModelo,
        "detector": HDDM_ADetector,
        "deterministico": True
    },
    
    {
        "nome": "LinearRegressionOnlineModelo",
        "avaliador": AvaliadorPassivo(),
        "modelo": LinearRegressionOnlineModelo,
        "detector": None,  # Passivo, não usa detector
        "deterministico": True
    },
    
    {
        "nome": "HoeffdingTreeRegressorModelo",
        "avaliador": AvaliadorPassivo(),
        "modelo": HoeffdingTreeRegressorModelo,
        "detector": None,  # Passivo, não usa detector
        "deterministico": True
    },
    
    {
        "nome": "OSELMModelo",
        "avaliador": AvaliadorPassivo(),
        "modelo": OSELMModelo,
        "detector": None,  # Passivo, não usa detector
        "deterministico": False
    },"""

In [30]:
modelos = [
    
    {
        "nome": "WinKS",
        "avaliador": WinKS(LinearRegressionModelo, KSWINDetector, n_janelas=20, alpha=0.05),
        "modelo": LinearRegressionModelo,
        "detector": KSWINDetector,
        "deterministico": False
    },
    
    {
        "nome": "LR + KSWIN",
        "avaliador": AvaliadorBatch(),
        "modelo": LinearRegressionModelo,
        "detector": KSWINDetector,
        "deterministico": False
    },
    
]

# **4. Executando o Experimento**

In [31]:
# Criando e executando o experimento
experimento = Experimento(
    series=series,
    modelos=modelos,
    tamanho_batch=tamanho_batch,
    lags=lags,
    repeticoes=repeticoes
)

resultados = experimento.executar()

Executando WinKS na série: AAPL
477 - 15
1153 - 15
1552 - 15
2140 - 15
2584 - 15
3461 - 15
4248 - 30
4740 - 30
5696 - 15
6104 - 15
6577 - 45
7020 - 15
7418 - 15
7864 - 15
8390 - 15
9371 - 30
9875 - 30
10396 - 15
10973 - 15
476 - 15
956 - 15
1411 - 15
1813 - 15
2379 - 15
3055 - 15
3466 - 15
4081 - 15
4470 - 15
5031 - 30
5447 - 30
5868 - 15
6272 - 15
6677 - 15
7081 - 45
7577 - 15
8033 - 15
9127 - 15
9544 - 15
9946 - 15
10504 - 15
10972 - 15
442 - 15
956 - 15
1407 - 30
1808 - 30
2245 - 15
2654 - 15
3460 - 15
4077 - 15
4471 - 15
4860 - 15
5483 - 15
6103 - 15
6678 - 15
7095 - 60
7577 - 15
8034 - 15
8613 - 15
9104 - 15
9557 - 15
9942 - 15
10335 - 15
10774 - 30
484 - 15
954 - 30
1549 - 15
2035 - 15
2585 - 15
3462 - 15
4245 - 30
4658 - 30
5034 - 30
5480 - 30
5879 - 15
6340 - 45
6694 - 30
7085 - 30
7577 - 15
8034 - 15
8609 - 15
9103 - 15
9556 - 15
9943 - 15
10685 - 15
477 - 15
1159 - 45
1552 - 15
2436 - 15
2951 - 15
3337 - 15
3866 - 15
4296 - 15
4743 - 30
5692 - 15
6102 - 15
6678 - 15
7092 - 60

In [32]:
# salvando os resultados
arquivo_salvamento = "Experimento_gabriel.csv"
FileManager.salvar_resultados(resultados, arquivo_salvamento)


Resultados salvos em: Experimento_gabriel.csv


'Experimento_gabriel.csv'

# **5. Análise Estatística dos Resultados**

## Leitura dos Resultados

In [33]:
# 1. Ler o arquivo CSV
df = pd.read_csv(arquivo_salvamento)

## Tabelas de Resultados

In [34]:
# 3. Agrupar por modelo e calcular estatísticas
summary = df.groupby(['serie', 'modelo']).agg(
    mae_mean=('mae', 'mean'),
    mae_std=('mae', 'std'),
    qtd_deteccoes_mean=('qtd_deteccoes', 'mean'),
    qtd_deteccoes_std=('qtd_deteccoes', 'std')
).reset_index()

### Média do MAE

In [35]:
# Tabela para a média do MAE
mae_mean_table = summary.pivot(index='serie', columns='modelo', values='mae_mean')
mae_mean_table

modelo,LR + KSWIN,WinKS
serie,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPL,0.006916,0.006966
B3SA3.SA,0.034474,0.034239
BVSP,0.016444,0.016599
DJI,0.0135,0.013775
GSPC,0.005737,0.005678
IXIC,0.008222,0.008571
USDBRLX,0.021628,0.022514


### Desvio do MAE

In [36]:
# Tabela para o desvio padrão do MAE
mae_std_table = summary.pivot(index='serie', columns='modelo', values='mae_std')
mae_std_table

modelo,LR + KSWIN,WinKS
serie,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPL,0.000237,0.000144
B3SA3.SA,0.002403,0.00258
BVSP,0.000466,0.000528
DJI,0.000645,0.000498
GSPC,0.000277,0.000316
IXIC,0.000277,0.000611
USDBRLX,0.001226,0.001024


### Média Detecções

In [37]:
# Tabela para a média das detecções
deteccoes_mean_table = summary.pivot(index='serie', columns='modelo', values='qtd_deteccoes_mean')
deteccoes_mean_table

modelo,LR + KSWIN,WinKS
serie,Unnamed: 1_level_1,Unnamed: 2_level_1
AAPL,19.8,20.7
B3SA3.SA,5.9,6.0
BVSP,13.4,14.3
DJI,14.0,14.4
GSPC,37.6,39.7
IXIC,20.9,21.8
USDBRLX,9.2,8.7


## Teste Estatístico

In [38]:
# 4. Preparar dados para autorank (uma linha por repetição, colunas por modelo)
df_mae = df.pivot_table(index=["serie", "repeticao"], columns="modelo", values="mae")
# Remover qualquer valor NaN para análise (caso haja)
df_mae = df_mae.dropna()
# Tabela de desempenho
df_mae

Unnamed: 0_level_0,modelo,LR + KSWIN,WinKS
serie,repeticao,Unnamed: 2_level_1,Unnamed: 3_level_1
AAPL,1,0.006715,0.007228
AAPL,2,0.006851,0.006917
AAPL,3,0.007529,0.006867
AAPL,4,0.006843,0.006749
AAPL,5,0.006866,0.007083
...,...,...,...
USDBRLX,6,0.021433,0.023668
USDBRLX,7,0.020607,0.022828
USDBRLX,8,0.022858,0.022853
USDBRLX,9,0.022386,0.022417


In [39]:
# computing the most appropriate statistical test
results = autorank(-df_mae)
# printing the report
create_report(results)

The statistical analysis was conducted for 2 populations with 70 paired samples.
The family-wise significance level of the tests is alpha=0.050.
We rejected the null hypothesis that the population is normal for the populations WinKS (p=0.000) and LR + KSWIN (p=0.000). Therefore, we assume that not all populations are normal.
No check for homogeneity was required because we only have two populations.
Because we have only two populations and both of them are not normal, we use Wilcoxon's signed rank test to determine the differences in the central tendency and report the median (MD) and the median absolute deviation (MAD) for each population.
We failed to reject the null hypothesis (p=0.839) of Wilcoxon's signed rank test that population WinKS (MD=-0.014+-0.005, MAD=0.007) is not greater than population LR + KSWIN (MD=-0.014+-0.004, MAD=0.007) . Therefore, we assume that there is no statistically significant difference between the medians of the populations.


In [40]:
# plotting the results
plot_stats(results, allow_insignificant=True)