## Bibliotecas

In [None]:
%%time
%%capture
# %pip install -U aeon --quiet
# %pip install aeon[all_extras] --quiet
# %pip install --upgrade numba --quiet
# %pip install -U sktime --quiet
# %pip install -U matplotlib --quiet
# %pip install pycatch22 --quiet
# %pip install seaborn --quiet
import warnings
warnings.filterwarnings("ignore")
import numpy as np
import pandas as pd
import os
import time
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import f_oneway
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import FunctionTransformer
from sktime.datatypes._panel._convert import from_nested_to_3d_numpy

from aeon.classification.interval_based import DrCIFClassifier
from aeon.transformations.collection.convolution_based import Rocket
# from aeon.transformations.collection.convolution_based import MiniRocket
# from aeon.transformations.collection.convolution_based import MultiRocket

from aeon.datasets import load_classification
from aeon.testing.data_generation import make_example_3d_numpy
from aeon.base._estimators.interval_based import BaseIntervalForest
from aeon.classification.base import BaseClassifier
from aeon.classification.sklearn._continuous_interval_tree import ContinuousIntervalTree
from aeon.transformations.collection import PeriodogramTransformer
from aeon.transformations.collection.feature_based import Catch22
from aeon.utils.numba.general import first_order_differences_3d

CPU times: total: 344 ms
Wall time: 2.24 s


In [2]:
import aeon
print(aeon.__version__)

1.2.0


## Datasets

In [None]:
CHOSEN_DATASETS = [
    'ArrowHead',
    'Wine',
    'FreezerSmallTrain',
    'OliveOil',
    'FordB',
    'Car',
    'TwoPatterns',
    'InsectWingbeatSound',
    'BeetleFly',
    'Yoga',
    'InlineSkate',
    'FaceAll',
    'EOGVerticalSignal',
    'Ham',
    'MoteStrain',
    'ProximalPhalanxTW',
    'WordSynonyms',
    'Lightning7',
    'GunPointOldVersusYoung',
    'MelbournePedestrian',
    'Earthquakes'
]

## DrROCKET

In [None]:
def first_order_differences_3d(X):
    # X must be an array 3D: (n_instances, n_channels, n_timepoints)
    return np.diff(X, axis=2)  # difference along the time axis

# --- Class definition ---
class IntervalROCKETClassifier(BaseIntervalForest, BaseClassifier):
    """Interval-based classifier using ROCKET features only.

    Applies ROCKET to intervals from different series representations:
    - identity (raw series)
    - first-order differences
    - periodogram

    Parameters
    ----------
    num_kernels : int, dict, default=500
        If int: same number of kernels for all representations.
        If dict: keys = {"identity", "diff", "periodogram"}, values = num_kernels for each.
    """

    _tags = {
        "capability:multivariate": True,
        "capability:train_estimate": True,
        "capability:contractable": True,
        "capability:multithreading": True,
        "algorithm_type": "interval",
    }

    def __init__(
        self,
        base_estimator=None,
        n_estimators=50,
        n_intervals=(4, "sqrt-div"),
        min_interval_length=3,
        max_interval_length=0.5,
        att_subsample_size=10,
        num_kernels=500,
        time_limit_in_minutes=None,
        contract_max_n_estimators=100,
        random_state=None,
        n_jobs=1,
        parallel_backend=None,
    ):
        self.num_kernels = num_kernels

        series_transformers = [
            None,
            FunctionTransformer(func=first_order_differences_3d, validate=False),
            PeriodogramTransformer(),
        ]

        interval_features = [
            Rocket(self.num_kernels) for _ in range(len(series_transformers))
        ]

        super().__init__(
            base_estimator=base_estimator,
            n_estimators=n_estimators,
            interval_selection_method="random",
            n_intervals=n_intervals,
            min_interval_length=min_interval_length,
            max_interval_length=max_interval_length,
            interval_features=interval_features,
            series_transformers=series_transformers,
            att_subsample_size=att_subsample_size,
            replace_nan=0,
            time_limit_in_minutes=time_limit_in_minutes,
            contract_max_n_estimators=contract_max_n_estimators,
            random_state=random_state,
            n_jobs=n_jobs,
            parallel_backend=parallel_backend,
        )

    def _fit(self, X, y):
        return super()._fit(X, y)

    def _predict(self, X):
        return super()._predict(X)

    def _predict_proba(self, X):
        return super()._predict_proba(X)

    @classmethod
    def _get_test_params(parameter_set="default"):
        if parameter_set == "small":
            return {"num_kernels": 10}
        elif parameter_set == "medium":
            return {"num_kernels": 50}
        return {"num_kernels": 500}

## Experiment

In [None]:
def run_experiment(dataset_name, num_iteracoes, resultados_path="res_det_4k.csv"):
    all_metrics = []
    write_header = not os.path.exists(resultados_path)  # Only write the header if the file does not already exist

    for i in range(num_iteracoes):
        # print(f"  Iteração {i + 1}/{num_iteracoes}")
        X_train, y_train = load_classification(dataset_name, split="train")
        X_test, y_test = load_classification(dataset_name, split="test")

        clf = IntervalROCKETClassifier(
            n_estimators=50,
            num_kernels=4,
            att_subsample_size=None,
            n_jobs=-1
        )
        
        # Measure training time
        start_train = time.time()
        clf.fit(X_train, y_train)
        train_time = time.time() - start_train

        # Measure testing time
        start_test = time.time()
        y_pred = clf.predict(X_test)
        test_time = time.time() - start_test

        acc = accuracy_score(y_test, y_pred)
        prec = precision_score(y_test, y_pred, average="macro", zero_division=0)
        rec = recall_score(y_test, y_pred, average="macro", zero_division=0)
        f1 = f1_score(y_test, y_pred, average="macro", zero_division=0)

        result_dict = {
            "dataset": dataset_name,
            "iteration": i + 1,
            "accuracy": acc,
            "precision": prec,
            "recall": rec,
            "f1_score": f1,
            "train_time_seconds": train_time,
            "test_time_seconds": test_time
        }

        # Save to CSV immediately
        pd.DataFrame([result_dict]).to_csv(
            resultados_path,
            mode="a",
            header=write_header,
            index=False
        )
        write_header = False  # Only write the header the first time

        all_metrics.append(result_dict)

    return all_metrics

# Remove the old file if you want to ensure cleanliness
if os.path.exists("detailed_results_4kernels.csv"):
    os.remove("detailed_results_4kernels.csv")

# Run the experiments for all datasets
for dataset in CHOSEN_DATASETS:
    print(f"Executando experimento para o dataset: {dataset}")
    _ = run_experiment(dataset_name=dataset, num_iteracoes=3)
    print(f"Experimento para {dataset} concluído.\n")

Executando experimento para o dataset: ArrowHead
Experimento para ArrowHead concluído.

Executando experimento para o dataset: Wine
Experimento para Wine concluído.

Executando experimento para o dataset: FreezerSmallTrain
Experimento para FreezerSmallTrain concluído.

Executando experimento para o dataset: OliveOil
Experimento para OliveOil concluído.

Executando experimento para o dataset: FordB
Experimento para FordB concluído.

Executando experimento para o dataset: Car
Experimento para Car concluído.

Executando experimento para o dataset: TwoPatterns
Experimento para TwoPatterns concluído.

Executando experimento para o dataset: InsectWingbeatSound
Experimento para InsectWingbeatSound concluído.

Executando experimento para o dataset: BeetleFly
Experimento para BeetleFly concluído.

Executando experimento para o dataset: Yoga
Experimento para Yoga concluído.

Executando experimento para o dataset: InlineSkate
Experimento para InlineSkate concluído.

Executando experimento para o 

In [None]:
# Reads the generated CSV and calculates the summary
results_df = pd.read_csv("detailed_results_4kernels.csv")

summary = results_df.groupby("dataset").agg(
    accuracy_mean=("accuracy", "mean"),
    accuracy_std=("accuracy", "std"),
    accuracy_min=("accuracy", "min"),
    accuracy_max=("accuracy", "max"),
    precision_mean=("precision", "mean"),
    precision_std=("precision", "std"),
    recall_mean=("recall", "mean"),
    recall_std=("recall", "std"),
    f1_mean=("f1_score", "mean"),
    f1_std=("f1_score", "std"),
    train_time_mean=("train_time_seconds", "mean"),
    train_time_std=("train_time_seconds", "std"),
    test_time_mean=("test_time_seconds", "mean"),
    test_time_std=("test_time_seconds", "std")
).reset_index()

# Save final summary
summary.to_csv("4kernels_summary_results.csv", index=False)

print("All experiments have been completed. Results saved.")

Todos os experimentos foram finalizados. Resultados salvos.
