In [None]:
!pip install PyWavelets
!pip install pyts
!pip install aeon

In [2]:
import os

import aeon
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import time

import pywt
from scipy import signal
from pyts.image import MarkovTransitionField
from pyts.image import GramianAngularField
from pyts.image import RecurrencePlot
from PIL import Image
from datetime import datetime

from sklearn.metrics import accuracy_score
from sklearn.linear_model import RidgeClassifierCV

from aeon.transformations.collection.convolution_based import Rocket, MiniRocket
from aeon.datasets import load_from_ts_file
from aeon.datasets import load_classification

from aeon.datasets import load_classification
from aeon.datasets.tsc_datasets import multivariate
import time

In [3]:
def znorm(x):
    """
    função para normalizar as séries na mesma escala
    a série ficará com uma média próxima de 0 e desvio-padrão próximo de 1
    """

    x_znorm = (x - np.mean(x)) / np.std(x)
    return x_znorm


def transform_series(series, representation):
    """
    função que transforma uma série de entrada em uma imagem em 2D.
    transformações que serão exploradas: CWT, MTF, GADF, GASF, RP e FIRTS
    referência para entender um pouco melhor: https://pyts.readthedocs.io/en/stable/modules/image.html
    """
    
    series = np.array(znorm(series))
    if representation == "CWT":
        coeffs, freqs = pywt.cwt(series, scales=np.arange(1, len(series) + 1), wavelet='morl') # morl
        im_final = coeffs
    elif representation == "MTF":
        series = series.reshape(1, len(series))
        mtf = MarkovTransitionField(strategy='normal') #n_bins=4, strategy='uniform'
        X_mtf = mtf.fit_transform(series)
        im_final = X_mtf[0]
    elif representation == "GADF":
        series = series.reshape(1, len(series))
        gaf = GramianAngularField(method='difference')
        X_gaf = gaf.fit_transform(series)
        im_final = X_gaf[0]
    elif representation == "GASF":
        series = series.reshape(1, len(series))
        gaf = GramianAngularField(method='summation')
        X_gaf = gaf.fit_transform(series)
        im_final = X_gaf[0]
    elif representation == "RP":
        series = series.reshape(1, len(series))
        rp = RecurrencePlot(threshold='distance')
        X_rp = rp.fit_transform(series)
        im_final = X_rp[0]
    elif representation == "FIRTS":
        series = series.reshape(1, len(series))
        mtf = MarkovTransitionField(n_bins=4, strategy='uniform')
        X_mtf = mtf.fit_transform(series)
        gaf = GramianAngularField(method='difference')
        X_gaf = gaf.fit_transform(series)
        rp = RecurrencePlot(threshold='distance')
        X_rp = rp.fit_transform(series)
        im_final = (X_mtf[0] + X_gaf[0] + X_rp[0]) # FIRTS é fusão entre MTF, GADF e RP (vejam o artigo que passei para vocês)
    return im_final


def dimensions_fusion(img_dataset, operation):
    """
    operation: sum, subtraction, dot_product, element_wise
    """

    new_data = []
    for dataset in img_dataset:
        imgs = dataset.copy()
        img_final = imgs.pop()
        for img in imgs:
            if operation == 'sum':
                img_final += img
            elif operation == 'subtraction':
                img_final -= img
            elif operation == 'dot_product':
                img_final = np.dot(img_final, img)
            elif operation == 'element_wise':
                img_final = np.multiply(img_final, img)

        flatten_img = img_final.flatten()
        new_data.append(flatten_img)

    return np.array(new_data)


def load_dataset(dataset_name):
    try:
        started_at = time.time()
        print(f"Carregando {dataset_name}")
        print(f"Iniciando em {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}")

        X_train, y_train = load_from_ts_file(f"{DATA_PATH}/{dataset_name}/{dataset_name}_TRAIN.ts")
        X_test, y_test = load_from_ts_file(f"{DATA_PATH}/{dataset_name}/{dataset_name}_TEST.ts")

        return {
            "X_train": X_train,
            "y_train": y_train,
            "X_test": X_test,
            "y_test": y_test,
        }
    except Exception as e:
        print(e)
        print(f"Não foi possível carregar o dataset {dataset_name} armazenados na máquina local")
        print(f"Iniciando download do dataset {dataset_name}")

        X_train, y_train = load_classification(dataset_name, split="Train")
        X_test, y_test = load_classification(dataset_name, split="Test")

        print("Download finalizado com sucesso")

        return {
            "X_train": X_train,
            "y_train": y_train,
            "X_test": X_test,
            "y_test": y_test,
        }
    finally:
        print(f"Tempo de carregamento: {time.time() - started_at} segundos")

In [4]:
DATA_PATH = "C:/Users/bruno/puc/iniciacao/datasets/data"


def load_dataset(dataset_name):
    try:
        started_at = time.time()
        print(f"Carregando {dataset_name}")
        print(f"Iniciando em {datetime.now().strftime("%Y-%m-%d %H:%M:%S")}")

        X_train, y_train = load_from_ts_file(f"{DATA_PATH}/{dataset_name}/{dataset_name}_TRAIN.ts")
        X_test, y_test = load_from_ts_file(f"{DATA_PATH}/{dataset_name}/{dataset_name}_TEST.ts")

        return {
            "X_train": X_train,
            "y_train": y_train,
            "X_test": X_test,
            "y_test": y_test,
        }
    except Exception as e:
        print(e)
        print(f"Não foi possível carregar o dataset {dataset_name} armazenados na máquina local")
        print(f"Iniciando download do dataset {dataset_name}")

        X_train, y_train = load_classification(dataset_name, split="Train")
        X_test, y_test = load_classification(dataset_name, split="Test")

        print("Download finalizado com sucesso")

        return {
            "X_train": X_train,
            "y_train": y_train,
            "X_test": X_test,
            "y_test": y_test,
        }
    finally:
        print(f"Tempo de carregamento: {time.time() - started_at} segundos")


In [51]:
dataset_name = "AtrialFibrillation"

dataset = load_dataset(dataset_name)
X_train = dataset["X_train"]
y_train = dataset["y_train"]
X_test = dataset["X_test"]
y_test = dataset["y_test"]

Carregando AtrialFibrillation
Iniciando em 2025-03-02 16:11:05
Tempo de carregamento: 0.009135007858276367 segundos
(15, 2, 640)
[-0.34086 -0.38038 -0.3458  -0.36556 -0.3458 ]
[0.1482  0.13338 0.10868 0.09386 0.0741 ]
[-0.19266 -0.247   -0.23712 -0.2717  -0.2717 ]
[-0.19266 -0.247   -0.23712 -0.2717  -0.2717 ]


In [5]:
from aeon.datasets.tsc_datasets import multivariate
from sklearn.linear_model import RidgeClassifierCV

import time

full_results = []


datasets_for_test = list(multivariate)

# for dataset_name in datasets_for_test:
for dataset_name in ["AtrialFibrillation"]:
    results = pd.DataFrame(columns=[
        "dataset",
        "representation",
        "representation_transform_time",
        "operation",
        "dimention_fusion_time",
        "accuracy",
        "convolution_algorithm",
        "convolution_time",
        "classification_algorithm",
        "train_time",
        "validation_time",
    ])

    dataset_process_started_at = time.time()
    print(f"Processando dataset {dataset_name}")

    dataset = load_dataset(dataset_name)
    X_train = dataset["X_train"]
    y_train = dataset["y_train"]
    X_test = dataset["X_test"]
    y_test = dataset["y_test"]

    try:
        result = np.sum(X_train[0], axis=0)
        X_train_transformed = np.array([np.sum(serie, axis=0) for serie in X_train])
        X_test_transformed = np.array([np.sum(serie, axis=0) for serie in X_test])

        # jogando direto pro ridge
        classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))

        ridge_train_started_at = time.time()
        print(f"Treinando o algoritmo Ridge")
        classifier.fit(X_train_transformed, y_train)
        train_time = time.time() - ridge_train_started_at
        print(f"Tempo de treinamento: {train_time} segundos")

        ridge_validation_started_at = time.time()
        print(f"Validando o algoritmo Ridge")
        accuracy = classifier.score(X_test_transformed, y_test)
        validation_time = time.time() - ridge_validation_started_at
        print(f"Tempo de validação: {validation_time} segundos")

        print(f'Acurácia: {accuracy}\n')

        new_result_line = {
            "dataset": dataset_name,
            "representation": None,
            "representation_transform_time": None,
            "operation": None,
            "dimention_fusion_time": None,
            "accuracy": accuracy,
            "convolution_algorithm": None,
            "convolution_time": None,
            "classification_algorithm": "Ridge",
            "train_time": train_time,
            "validation_time": validation_time,
        }
        results.loc[len(results)] = new_result_line
    except Exception as e:
        print("Problema com o dataset: " + dataset_name)
        print(e)
    print(f"Tempo de processamento do dataset: {time.time() - dataset_process_started_at} segundos")

    results.to_csv(f'{dataset_name}_results.csv', index=False)


Processando dataset AtrialFibrillation
Carregando AtrialFibrillation
Iniciando em 2025-03-25 14:25:20
Tempo de carregamento: 0.009000778198242188 segundos
Treinando o algoritmo Ridge
Tempo de treinamento: 0.015593528747558594 segundos
Validando o algoritmo Ridge
Tempo de validação: 0.0009989738464355469 segundos
Acurácia: 0.4

Tempo de processamento do dataset: 0.026592254638671875 segundos


In [None]:
from aeon.transformations.collection.convolution_based import Rocket, MiniRocket
from aeon.datasets.tsc_datasets import multivariate
from sklearn.metrics import accuracy_score
from sklearn.linear_model import RidgeClassifierCV

import time

reps = ['RP', 'MTF', 'GASF', 'GADF', 'FIRTS', 'CWT']
operations = ["sum", "subtraction", "dot_product", "element_wise"]

full_results = []

datasets_for_test = list(multivariate)

for dataset_name in datasets_for_test:
    results = pd.DataFrame(columns=[
        "dataset",
        "representation",
        "representation_transform_time",
        "operation",
        "dimention_fusion_time",
        "accuracy",
        "convolution_algorithm",
        "convolution_time",
        "classification_algorithm",
        "train_time",
        "validation_time",
    ])

    dataset_process_started_at = time.time()
    print(f"Processando dataset {dataset_name}")

    dataset = load_dataset(dataset_name)
    X_train = dataset["X_train"]
    y_train = dataset["y_train"]
    X_test = dataset["X_test"]
    y_test = dataset["y_test"]

    for representation in reps:
        representation_transformation_started_at = time.time()
        print(f"Transformando as dimensões na representação {representation}")

        transformed_train_series = []
        for exemple in X_train:
            transformed_train_series.append(
                [transform_series(series, representation) for series in exemple]
            )
        transformed_test_series = []
        for exemple in X_test:
            transformed_test_series.append(
                [transform_series(series, representation) for series in exemple]
            )

        representation_transform_time = time.time() - representation_transformation_started_at
        print(f"Tempo de transformação: {representation_transform_time} segundos")

        for operation in operations:
            try:
                fusion_process_started_at = time.time()
                print(f"Fundindo as dimensões na operação {operation}")

                X_train_transformed = dimensions_fusion(transformed_train_series, operation, representation)
                X_test_transformed = dimensions_fusion(transformed_test_series, operation, representation)
                dimention_fusion_time = time.time() - fusion_process_started_at
                print(f"Tempo de fusão: {dimention_fusion_time} segundos")

                # jogando direto pro ridge
                classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))

                ridge_train_started_at = time.time()
                print(f"Treinando o algoritmo Ridge")
                classifier.fit(X_train_transformed, y_train)
                train_time = time.time() - ridge_train_started_at
                print(f"Tempo de treinamento: {train_time} segundos")

                ridge_validation_started_at = time.time()
                print(f"Validando o algoritmo Ridge")
                accuracy = classifier.score(X_test_transformed, y_test)
                validation_time = time.time() - ridge_validation_started_at
                print(f"Tempo de validação: {validation_time} segundos")

                print(f'Acurácia: {accuracy}\n')

                new_result_line = {
                    "dataset": dataset_name,
                    "representation": representation,
                    "representation_transform_time": representation_transform_time,
                    "operation": operation,
                    "dimention_fusion_time": dimention_fusion_time,
                    "accuracy": accuracy,
                    "convolution_algorithm": None,
                    "convolution_time": None,
                    "classification_algorithm": "Ridge",
                    "train_time": train_time,
                    "validation_time": validation_time,
                }
                results.loc[len(results)] = new_result_line



                # processar usando o rocket
                # rocket_convolution_started_at = time.time()
                # print(f"Processando convolução com o algoritmo Rocket")

                # algorithm = Rocket(num_kernels=10000, n_jobs=-1, random_state=6)
                # algorithm.fit(X_train_transformed)

                # X_train_transformed = algorithm.transform(X_train_transformed)
                # X_test_transformed = algorithm.transform(X_test_transformed)
                # rocket_convolution_time = time.time() - rocket_convolution_started_at
                # print(f"Tempo de convolução: {rocket_convolution_time} segundos")

                # classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))

                # ridge_train_with_rocket_started_at = time.time()
                # print(f"Treinando o algoritmo Ridge com a convolução Rocket")
                # classifier.fit(X_train_transformed, y_train)
                # train_time = time.time() - ridge_train_with_rocket_started_at
                # print(f"Tempo de treinamento: {train_time} segundos")

                # ridge_with_rocket_validation_started_at = time.time()
                # print(f"Validando o algoritmo Ridge com a convolução Rocket")
                # accuracy = classifier.score(X_test_transformed, y_test)
                # validation_time = time.time() - ridge_with_rocket_validation_started_at
                # print(f"Tempo de validação: {validation_time} segundos")

                # print(f'Acurácia: {accuracy}')

                # new_result_line = {
                #     "dataset": dataset_name,
                #     "representation": representation, 
                #     "representation_transform_time": representation_transform_time, 
                #     "operation": operation, 
                #     "dimention_fusion_time": dimention_fusion_time,
                #     "accuracy": accuracy,
                #     "convolution_algorithm": "Rocket", 
                #     "convolution_time": rocket_convolution_time, 
                #     "classification_algorithm": "Ridge",
                #     "train_time": train_time, 
                #     "validation_time": validation_time,
                # }
                # results.loc[len(results)] = new_result_line



                # processar usando o minirocket
                minirocket_convolution_started_at = time.time()
                print(f"Processando convolução com o algoritmo MiniRocket")

                algorithm = MiniRocket(num_kernels=10000, n_jobs=-1, random_state=6)
                algorithm.fit(X_train_transformed)

                X_train_transformed = algorithm.transform(X_train_transformed)
                X_test_transformed = algorithm.transform(X_test_transformed)
                minirocket_convolution_time = time.time() - minirocket_convolution_started_at
                print(f"Tempo de convolução: {minirocket_convolution_time} segundos")

                classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))

                ridge_train_with_rocket_started_at = time.time()
                print(f"Treinando o algoritmo Ridge com a convolução MiniRocket")
                classifier.fit(X_train_transformed, y_train)
                train_time = time.time() - ridge_train_with_rocket_started_at
                print(f"Tempo de treinamento: {train_time} segundos")

                ridge_with_minirocket_validation_started_at = time.time()
                print(f"Validando o algoritmo Ridge com a convolução MiniRocket")
                accuracy = classifier.score(X_test_transformed, y_test)
                validation_time = time.time() - ridge_with_minirocket_validation_started_at
                print(f"Tempo de validação: {validation_time} segundos")

                print(f'Acurácia: {accuracy}')

                new_result_line = {
                    "dataset": dataset_name,
                    "representation": representation,
                    "representation_transform_time": representation_transform_time,
                    "operation": operation,
                    "dimention_fusion_time": dimention_fusion_time,
                    "accuracy": accuracy,
                    "convolution_algorithm": "MiniRocket",
                    "convolution_time": minirocket_convolution_time,
                    "classification_algorithm": "Ridge",
                    "train_time": train_time,
                    "validation_time": validation_time,
                }
                results.loc[len(results)] = new_result_line
            except Exception as e:
                print("Problema com o dataset: " + dataset_name)
                print(e)
    print(f"Tempo de processamento do dataset: {time.time() - dataset_process_started_at} segundos")

    results.to_csv(f'{dataset_name}_results.csv', index=False)
