In [1]:
import os
import pandas as pd
import numpy as np

# Directory containing the files
directory = r'E:\sahithi\FOCAL'  # Replace with your actual directory path

# Initialize an empty array to store the data
data_array = np.empty((3750, 10240, 2))  # Assuming 3750 files, each with 10240 time points and 2 columns

# Iterate over files in the directory
for i, filename in enumerate(os.listdir(directory)):
    if filename.endswith('.txt'):
        file_path = os.path.join(directory, filename)
        
        # Read data from file using pandas, load both columns (assuming they are separated by commas)
        data_df = pd.read_csv(file_path, delimiter=',', usecols=[0], header=None)
        
        # Convert the data to numpy array
        data = data_df.values
        
        # Ensure the data shape matches expectations
        assert data.shape == (10240, 2), f"File {filename} has unexpected shape {data.shape}"
        
        # Store data in the array
        data_array[i, :, :] = data

# Print the shape of the final array
print(data_array.shape)


(3750, 10240, 2)


In [2]:
# Find rows that contain any NaN values
nan_rows = np.any(np.isnan(data_array), axis=1)  # Rows containing any NaN values

# Get the indices of rows with NaN values
nan_row_indices = np.where(nan_rows)[0]

# Delete the rows that contain NaN values
focal_array = np.delete(data_array, nan_row_indices, axis=0)

# Print the shape of the resulting array
print(focal_array.shape)


(3750, 10240, 2)


In [4]:
nan_row_indices


array([], dtype=int64)

In [5]:
import os
import pandas as pd
import numpy as np

# Directory containing the files
directory = r'E:\sahithi\NON_FOCAL'  # Replace with your actual directory path

# Initialize an empty array to store the data
data_arrayy = np.empty((3750, 10240, 2))  # Assuming 3750 files, each with 10240 time points and 2 columns

# Iterate over files in the directory
for i, filename in enumerate(os.listdir(directory)):
    if filename.endswith('.txt'):
        file_path = os.path.join(directory, filename)
        
        # Read data from file using pandas, load both columns (assuming they are separated by commas)
        data_df = pd.read_csv(file_path, delimiter=',', usecols=[0, 1], header=None)
        
        # Convert the data to numpy array
        data = data_df.values
        
        # Ensure the data shape matches expectations
        assert data.shape == (10240, 2), f"File {filename} has unexpected shape {data.shape}"
        
        # Store data in the array
        data_arrayy[i, :, :] = data

# Print the shape of the final array
print(data_arrayy.shape)


(3750, 10240, 2)


In [6]:
# Find rows that contain any NaN values
nan_rows = np.any(np.isnan(data_arrayy), axis=1)  # Rows containing any NaN values

# Get the indices of rows with NaN values
nan_row_indices = np.where(nan_rows)[0]

# Delete the rows that contain NaN values
non_focal_array = np.delete(data_arrayy, nan_row_indices, axis=0)

# Print the shape of the resulting array
print(non_focal_array.shape)


(3750, 10240, 2)


In [7]:
import numpy as np

# Create an array with zeros and ones
y_nf = np.ones(3750)
print(y_nf.shape)

(3750,)


In [8]:
import numpy as np

# Create an array with zeros and ones
y_f = np.zeros(3750)
print(y_f.shape)

(3750,)


In [9]:
import numpy as np

# Assuming you have two arrays
array1 = focal_array
array2 = non_focal_array

# Stack them horizontally
stacked_array = np.vstack((array1, array2))

print("Stacked array horizontally:")
print(stacked_array)


Stacked array horizontally:
[[[-23.584467  22.621477]
  [-20.180197  26.547081]
  [-16.631811  28.431639]
  ...
  [ 17.817694  -4.722577]
  [ 17.604982  -3.035015]
  [ 15.054106  -2.973314]]

 [[  1.770672   2.16606 ]
  [  7.448948  11.830342]
  [ 13.643579  22.593559]
  ...
  [-12.643819 -14.031724]
  [-11.053504 -20.490677]
  [-11.453481 -23.759109]]

 [[-22.218569  -6.315594]
  [-20.903902   2.320192]
  [-22.920767   8.273545]
  ...
  [-41.046448 -12.789649]
  [-42.06147  -15.303322]
  [-46.215527 -21.942698]]

 ...

 [[-16.280914 -10.494535]
  [-14.578059  -6.535292]
  [-12.626638  -4.157788]
  ...
  [-21.634005  -9.552231]
  [-22.678474 -10.085828]
  [-22.286791  -9.175525]]

 [[ 11.63331   -1.116699]
  [  8.432248  -9.232866]
  [ -5.501138 -22.984268]
  ...
  [ 29.681234 -21.715002]
  [ 28.138624 -61.248924]
  [ 22.855257 -78.836479]]

 [[-21.539114 -24.517914]
  [-21.258053 -26.164896]
  [-23.109068 -29.641247]
  ...
  [ -1.207698  -4.656437]
  [ -2.204289  -4.593308]
  [ -3.187

In [10]:
stacked_array.shape

(7500, 10240, 2)

In [11]:
import numpy as np

# Assuming you have two arrays
array1 = y_f
array2 = y_nf

# Stack them horizontally
y_labels = np.hstack((array1, array2))

print("Stacked array horizontally:")
print(y_labels)


Stacked array horizontally:
[0. 0. 0. ... 1. 1. 1.]


In [12]:
print(y_labels.shape)

(7500,)


In [13]:
y_labels[3750]

1.0

In [14]:
from sklearn.model_selection import train_test_split

# Assuming data_array and labels are already defined
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(stacked_array, y_labels, test_size=0.2, random_state=42)

# Print shapes of the resulting arrays
print(f"Shape of X_train: {X_train.shape}, Shape of y_train: {y_train.shape}")
print(f"Shape of X_test: {X_test.shape}, Shape of y_test: {y_test.shape}")


Shape of X_train: (6000, 10240, 2), Shape of y_train: (6000,)
Shape of X_test: (1500, 10240, 2), Shape of y_test: (1500,)


In [17]:
y_train[:5]

array([1., 1., 1., 0., 0.])

In [21]:
import numpy as np
from sklearn.linear_model import RidgeClassifierCV
from sklearn.preprocessing import StandardScaler

from aeon.classification import BaseClassifier
from aeon.classification.convolution_based._hydra import _SparseScaler
from aeon.transformations.collection.convolution_based import MultiRocket
from aeon.transformations.collection.convolution_based._hydra import HydraTransformer


class MultiRocketHydraClassifier(BaseClassifier):
    """MultiRocket-Hydra Classifier.

    A combination of the Hydra and MultiRocket algorithms. The algorithm concatenates
    the output of both algorithms and trains a linear classifier on the combined
    features.

    See both individual classifier/transformation for more details.

    Parameters
    ----------
    n_kernels : int, default=8
        Number of kernels per group for the Hydra transform.
    n_groups : int, default=64
        Number of groups per dilation for the Hydra transform.
    class_weight{“balanced”, “balanced_subsample”}, dict or list of dicts, default=None
        From sklearn documentation:
        If not given, all classes are supposed to have weight one.
        The “balanced” mode uses the values of y to automatically adjust weights
        inversely proportional to class frequencies in the input data as
        n_samples / (n_classes * np.bincount(y))
        The “balanced_subsample” mode is the same as “balanced” except that weights
        are computed based on the bootstrap sample for every tree grown.
        For multi-output, the weights of each column of y will be multiplied.
        Note that these weights will be multiplied with sample_weight (passed through
        the fit method) if sample_weight is specified.
    n_jobs : int, default=1
        The number of jobs to run in parallel for both `fit` and `predict`.
        ``-1`` means using all processors.
    random_state : int, RandomState instance or None, default=None
        If `int`, random_state is the seed used by the random number generator;
        If `RandomState` instance, random_state is the random number generator;
        If `None`, the random number generator is the `RandomState` instance used
        by `np.random`.

    Attributes
    ----------
    n_classes_ : int
        Number of classes. Extracted from the data.
    classes_ : ndarray of shape (n_classes_)
        Holds the label for each class.

    See Also
    --------
    HydraClassifier
    RocketClassifier

    References
    ----------
    .. [1] Dempster, A., Schmidt, D.F. and Webb, G.I., 2023. Hydra: Competing
        convolutional kernels for fast and accurate time series classification.
        Data Mining and Knowledge Discovery, pp.1-27.

    Examples
    --------
    >>> from aeon.classification.convolution_based import MultiRocketHydraClassifier
    >>> from aeon.testing.data_generation import make_example_3d_numpy
    >>> X, y = make_example_3d_numpy(n_cases=10, n_channels=1, n_timepoints=12,
    ...                              random_state=0)
    >>> clf = MultiRocketHydraClassifier(random_state=0)  # doctest: +SKIP
    >>> clf.fit(X, y)  # doctest: +SKIP
    MultiRocketHydraClassifier(random_state=0)
    >>> clf.predict(X)  # doctest: +SKIP
    array([0, 1, 0, 1, 0, 0, 1, 1, 1, 0])
    """

    _tags = {
        "capability:multivariate": True,
        "capability:multithreading": True,
        "algorithm_type": "convolution",
        "python_dependencies": "torch",
    }

    def __init__(
        self, n_kernels=8, n_groups=64, class_weight=None, n_jobs=1, random_state=None
    ):
        self.n_kernels = n_kernels
        self.n_groups = n_groups
        self.class_weight = class_weight
        self.n_jobs = n_jobs
        self.random_state = random_state

        super().__init__()

    def _fit(self, X, y):
        self._transform_hydra = HydraTransformer(
            n_kernels=self.n_kernels,
            n_groups=self.n_groups,
            n_jobs=self.n_jobs,
            random_state=self.random_state,
        )
        Xt_hydra = self._transform_hydra.fit_transform(X)

        self._scale_hydra = _SparseScaler()
        Xt_hydra = self._scale_hydra.fit_transform(Xt_hydra)

        self._transform_multirocket = MultiRocket(
            n_jobs=self.n_jobs,
            random_state=self.random_state,
        )
        Xt_multirocket = self._transform_multirocket.fit_transform(X)

        self._scale_multirocket = StandardScaler()
        Xt_multirocket = self._scale_multirocket.fit_transform(Xt_multirocket)

        Xt = np.concatenate((Xt_hydra, Xt_multirocket), axis=1)

        self.classifier = RidgeClassifierCV(
            alphas=np.logspace(-3, 3, 10), class_weight=self.class_weight
        )
        self.classifier.fit(Xt, y)

        return self

    def _predict(self, X) -> np.ndarray:
        Xt_hydra = self._transform_hydra.transform(X)
        Xt_hydra = self._scale_hydra.transform(Xt_hydra)

        Xt_multirocket = self._transform_multirocket.transform(X)
        Xt_multirocket = self._scale_multirocket.transform(Xt_multirocket)

        Xt = np.concatenate((Xt_hydra, Xt_multirocket), axis=1)

        return self.classifier.predict(Xt)

ImportError: Dask dataframe requirements are not installed.

Please either conda or pip install as follows:

  conda install dask                     # either conda install
  python -m pip install "dask[dataframe]" --upgrade  # or python -m pip install

In [None]:
clf = MultiRocketHydraClassifier(random_state=0)  # doctest: +SKIP
clf.fit(X_train,y_train)  # doctest: +SKIP
y = clf.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming you have the following true and predicted labels
true_labels = y_test
predicted_labels = y

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)

print(f"Accuracy: {accuracy * 100:.2f}%")

precision = precision_score(true_labels, predicted_labels)
print(f"Precision: {precision:.2f}")

# Calculate recall
recall = recall_score(true_labels, predicted_labels)
print(f"Recall: {recall:.2f}")

# Calculate F1-score
f1 = f1_score(true_labels, predicted_labels)
print(f"F1 Score: {f1:.2f}")