In [2]:
import numpy as np
import pandas as pd


from aeon.transformations.collection.convolution_based import Rocket, MiniRocket
from sklearn.linear_model import RidgeClassifierCV

from utils import config
from utils.utils import transform_series, dimensions_fusion, load_dataset, PAA
from aeon.datasets import load_from_ts_file, load_classification


In [4]:
RESULTS_FILENAME = f'results_final.csv'

In [5]:
try:
    df_results = pd.read_csv(f"{config.RESULTS_FOLDER}/{RESULTS_FILENAME}")
except FileNotFoundError:
    df_results = pd.DataFrame(columns=[
        "dataset",
        "representation",
        "operation",
        "accuracy",
        "convolution_algorithm",
        "classification_algorithm",
    ])


In [6]:
dataset_name = "LSST"
representation = "RP"
operation = "sum"

dataset = load_dataset(dataset_name, config.DATASETS_FOLDER)

X_train, y_train = load_from_ts_file(f"{config.DATASETS_FOLDER}/{dataset_name}/{dataset_name}_TRAIN.ts")
X_test, y_test = load_from_ts_file(f"{config.DATASETS_FOLDER}/{dataset_name}/{dataset_name}_TEST.ts")

# X_train, y_train = load_classification(dataset_name, split="Train")
# X_test, y_test = load_classification(dataset_name, split="Test")

X_train = dataset["X_train"]
y_train = dataset["y_train"]
X_test = dataset["X_test"]
y_test = dataset["y_test"]


In [8]:
np.array(X_train).shape

(2459, 6, 36)

In [17]:
transformed_train_series = []
for exemple in X_train:
    exemple_processed = []
    for series in exemple:
        if len(series) > 300:
            series = PAA(series, 300)
        t = transform_series(series, representation)
        exemple_processed.append(t)
    transformed_train_series.append(exemple_processed)
transformed_test_series = []
for exemple in X_test:
    exemple_processed= []
    for series in exemple:
        if len(series) > 300:
            series = PAA(series, 300)
        t = transform_series(series, representation)
        exemple_processed.append(t)
    transformed_test_series.append(exemple_processed)



In [18]:
print(np.array(transformed_train_series).shape)
print(np.array(transformed_test_series).shape)


(2459, 6, 36, 36)
(2466, 6, 36, 36)


In [14]:
X_train_transformed = dimensions_fusion(transformed_train_series, operation)
X_test_transformed = dimensions_fusion(transformed_test_series, operation)


In [15]:
X_train_transformed.shape

(2459, 1296)

In [13]:
classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
classifier.fit(X_train_transformed, y_train)

accuracy = classifier.score(X_test_transformed, y_test)

new_result_line = {
    "dataset": dataset_name,
    "representation": representation,
    "operation": operation,
    "accuracy": accuracy,
    "convolution_algorithm": None,
    "classification_algorithm": "Ridge",
}
df_results.loc[len(df_results)] = new_result_line
df_results.to_csv(f"{config.RESULTS_FOLDER}/{RESULTS_FILENAME}", index=False)


ValueError: Input X contains NaN.
RidgeClassifierCV does not accept missing values encoded as NaN natively. For supervised learning, you might want to consider sklearn.ensemble.HistGradientBoostingClassifier and Regressor which accept missing values encoded as NaNs natively. Alternatively, it is possible to preprocess the data, for instance by using an imputer transformer in a pipeline or drop samples with missing values. See https://scikit-learn.org/stable/modules/impute.html You can find a list of all estimators that handle NaN values at the following page: https://scikit-learn.org/stable/modules/impute.html#estimators-that-handle-nan-values

In [None]:
algorithm = Rocket(n_kernels=10000, n_jobs=-1, random_state=6)
algorithm.fit(X_train_transformed)

X_train_transformed = algorithm.transform(X_train_transformed)
X_test_transformed = algorithm.transform(X_test_transformed)

classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
classifier.fit(X_train_transformed, y_train)

accuracy = classifier.score(X_test_transformed, y_test)

new_result_line = {
    "dataset": dataset_name,
    "representation": representation,
    "operation": operation,
    "accuracy": accuracy,
    "convolution_algorithm": "Rocket",
    "classification_algorithm": "Ridge",
}
df_results.loc[len(df_results)] = new_result_line
df_results.to_csv(f"{config.RESULTS_FOLDER}/{RESULTS_FILENAME}", index=False)


In [None]:
algorithm = MiniRocket(n_kernels=10000, n_jobs=-1, random_state=6)
algorithm.fit(X_train_transformed)

X_train_transformed = algorithm.transform(X_train_transformed)
X_test_transformed = algorithm.transform(X_test_transformed)

classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
classifier.fit(X_train_transformed, y_train)

accuracy = classifier.score(X_test_transformed, y_test)

new_result_line = {
    "dataset": dataset_name,
    "representation": representation,
    "operation": operation,
    "accuracy": accuracy,
    "convolution_algorithm": "MiniRocket",
    "classification_algorithm": "Ridge",
}
df_results.loc[len(df_results)] = new_result_line
df_results.to_csv(f"{config.RESULTS_FOLDER}/{RESULTS_FILENAME}", index=False)
