In [1]:
import os
import pandas as pd
import numpy as np

# Directory containing the files
directory = r'E:\sahithi\FOCAL'  # Replace with your actual directory path

# Initialize an empty array to store the data
data_array = np.empty((3750, 10240, 2))  # Assuming 3750 files, each with 10240 time points and 2 columns

# Iterate over files in the directory
for i, filename in enumerate(os.listdir(directory)):
    if filename.endswith('.txt'):
        file_path = os.path.join(directory, filename)
        
        # Read data from file using pandas, load both columns (assuming they are separated by commas)
        data_df = pd.read_csv(file_path, delimiter=',', usecols=[0, 1], header=None)
        
        # Convert the data to numpy array
        data = data_df.values
        
        # Ensure the data shape matches expectations
        assert data.shape == (10240, 2), f"File {filename} has unexpected shape {data.shape}"
        
        # Store data in the array
        data_array[i, :, :] = data

# Print the shape of the final array
print(data_array.shape)


(3750, 10240, 2)


In [3]:
# Find rows that contain any NaN values
nan_rows = np.any(np.isnan(data_array), axis=1)  # Rows containing any NaN values

# Get the indices of rows with NaN values
nan_row_indices = np.where(nan_rows)[0]

# Delete the rows that contain NaN values
focal_array = np.delete(data_array, nan_row_indices, axis=0)

# Print the shape of the resulting array
print(focal_array.shape)


(3750, 10240, 2)


In [5]:
nan_row_indices


array([], dtype=int64)

In [7]:
import os
import pandas as pd
import numpy as np

# Directory containing the files
directory = r'E:\sahithi\NON_FOCAL'  # Replace with your actual directory path

# Initialize an empty array to store the data
data_arrayy = np.empty((3750, 10240, 2))  # Assuming 3750 files, each with 10240 time points and 2 columns

# Iterate over files in the directory
for i, filename in enumerate(os.listdir(directory)):
    if filename.endswith('.txt'):
        file_path = os.path.join(directory, filename)
        
        # Read data from file using pandas, load both columns (assuming they are separated by commas)
        data_df = pd.read_csv(file_path, delimiter=',', usecols=[0, 1], header=None)
        
        # Convert the data to numpy array
        data = data_df.values
        
        # Ensure the data shape matches expectations
        assert data.shape == (10240, 2), f"File {filename} has unexpected shape {data.shape}"
        
        # Store data in the array
        data_arrayy[i, :, :] = data

# Print the shape of the final array
print(data_arrayy.shape)


(3750, 10240, 2)


In [8]:
# Find rows that contain any NaN values
nan_rows = np.any(np.isnan(data_arrayy), axis=1)  # Rows containing any NaN values

# Get the indices of rows with NaN values
nan_row_indices = np.where(nan_rows)[0]

# Delete the rows that contain NaN values
non_focal_array = np.delete(data_arrayy, nan_row_indices, axis=0)

# Print the shape of the resulting array
print(non_focal_array.shape)


(3750, 10240, 2)


In [9]:
import numpy as np

# Create an array with zeros and ones
y_nf = np.ones(3750)
print(y_nf.shape)

(3750,)


In [10]:
import numpy as np

# Create an array with zeros and ones
y_f = np.zeros(3750)
print(y_f.shape)

(3750,)


In [11]:
import numpy as np

# Assuming you have two arrays
array1 = focal_array
array2 = non_focal_array

# Stack them horizontally
stacked_array = np.vstack((array1, array2))

print("Stacked array horizontally:")
print(stacked_array)


Stacked array horizontally:
[[[-23.584467  22.621477]
  [-20.180197  26.547081]
  [-16.631811  28.431639]
  ...
  [ 17.817694  -4.722577]
  [ 17.604982  -3.035015]
  [ 15.054106  -2.973314]]

 [[  1.770672   2.16606 ]
  [  7.448948  11.830342]
  [ 13.643579  22.593559]
  ...
  [-12.643819 -14.031724]
  [-11.053504 -20.490677]
  [-11.453481 -23.759109]]

 [[-22.218569  -6.315594]
  [-20.903902   2.320192]
  [-22.920767   8.273545]
  ...
  [-41.046448 -12.789649]
  [-42.06147  -15.303322]
  [-46.215527 -21.942698]]

 ...

 [[-16.280914 -10.494535]
  [-14.578059  -6.535292]
  [-12.626638  -4.157788]
  ...
  [-21.634005  -9.552231]
  [-22.678474 -10.085828]
  [-22.286791  -9.175525]]

 [[ 11.63331   -1.116699]
  [  8.432248  -9.232866]
  [ -5.501138 -22.984268]
  ...
  [ 29.681234 -21.715002]
  [ 28.138624 -61.248924]
  [ 22.855257 -78.836479]]

 [[-21.539114 -24.517914]
  [-21.258053 -26.164896]
  [-23.109068 -29.641247]
  ...
  [ -1.207698  -4.656437]
  [ -2.204289  -4.593308]
  [ -3.187

In [12]:
stacked_array.shape

(7500, 10240, 2)

In [13]:
import numpy as np

# Assuming you have two arrays
array1 = y_f
array2 = y_nf

# Stack them horizontally
y_labels = np.hstack((array1, array2))

print("Stacked array horizontally:")
print(y_labels)


Stacked array horizontally:
[0. 0. 0. ... 1. 1. 1.]


In [14]:
print(y_labels.shape)

(7500,)


In [15]:
y_labels[3750]

1.0

In [16]:
from sklearn.model_selection import train_test_split

# Assuming data_array and labels are already defined
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(stacked_array, y_labels, test_size=0.2, random_state=42)

# Print shapes of the resulting arrays
print(f"Shape of X_train: {X_train.shape}, Shape of y_train: {y_train.shape}")
print(f"Shape of X_test: {X_test.shape}, Shape of y_test: {y_test.shape}")


Shape of X_train: (6000, 10240, 2), Shape of y_train: (6000,)
Shape of X_test: (1500, 10240, 2), Shape of y_test: (1500,)


In [17]:
y_train[:5]

array([1., 1., 1., 0., 0.])

In [22]:
X_train_reshaped = X_train.transpose(0, 2, 1)

In [18]:
import numpy as np
from sklearn.linear_model import RidgeClassifierCV
from sklearn.preprocessing import StandardScaler

from aeon.classification import BaseClassifier
from aeon.classification.convolution_based._hydra import _SparseScaler
from aeon.transformations.collection.convolution_based import MultiRocket
from aeon.transformations.collection.convolution_based._hydra import HydraTransformer


class MultiRocketHydraClassifier(BaseClassifier):
    """MultiRocket-Hydra Classifier.

    A combination of the Hydra and MultiRocket algorithms. The algorithm concatenates
    the output of both algorithms and trains a linear classifier on the combined
    features.

    See both individual classifier/transformation for more details.

    Parameters
    ----------
    n_kernels : int, default=8
        Number of kernels per group for the Hydra transform.
    n_groups : int, default=64
        Number of groups per dilation for the Hydra transform.
    class_weight{“balanced”, “balanced_subsample”}, dict or list of dicts, default=None
        From sklearn documentation:
        If not given, all classes are supposed to have weight one.
        The “balanced” mode uses the values of y to automatically adjust weights
        inversely proportional to class frequencies in the input data as
        n_samples / (n_classes * np.bincount(y))
        The “balanced_subsample” mode is the same as “balanced” except that weights
        are computed based on the bootstrap sample for every tree grown.
        For multi-output, the weights of each column of y will be multiplied.
        Note that these weights will be multiplied with sample_weight (passed through
        the fit method) if sample_weight is specified.
    n_jobs : int, default=1
        The number of jobs to run in parallel for both `fit` and `predict`.
        ``-1`` means using all processors.
    random_state : int, RandomState instance or None, default=None
        If `int`, random_state is the seed used by the random number generator;
        If `RandomState` instance, random_state is the random number generator;
        If `None`, the random number generator is the `RandomState` instance used
        by `np.random`.

    Attributes
    ----------
    n_classes_ : int
        Number of classes. Extracted from the data.
    classes_ : ndarray of shape (n_classes_)
        Holds the label for each class.

    See Also
    --------
    HydraClassifier
    RocketClassifier

    References
    ----------
    .. [1] Dempster, A., Schmidt, D.F. and Webb, G.I., 2023. Hydra: Competing
        convolutional kernels for fast and accurate time series classification.
        Data Mining and Knowledge Discovery, pp.1-27.

    Examples
    --------
    >>> from aeon.classification.convolution_based import MultiRocketHydraClassifier
    >>> from aeon.testing.data_generation import make_example_3d_numpy
    >>> X, y = make_example_3d_numpy(n_cases=10, n_channels=1, n_timepoints=12,
    ...                              random_state=0)
    >>> clf = MultiRocketHydraClassifier(random_state=0)  # doctest: +SKIP
    >>> clf.fit(X, y)  # doctest: +SKIP
    MultiRocketHydraClassifier(random_state=0)
    >>> clf.predict(X)  # doctest: +SKIP
    array([0, 1, 0, 1, 0, 0, 1, 1, 1, 0])
    """

    _tags = {
        "capability:multivariate": True,
        "capability:multithreading": True,
        "algorithm_type": "convolution",
        "python_dependencies": "torch",
    }

    def __init__(
        self, n_kernels=8, n_groups=64, class_weight=None, n_jobs=1, random_state=None
    ):
        self.n_kernels = n_kernels
        self.n_groups = n_groups
        self.class_weight = class_weight
        self.n_jobs = n_jobs
        self.random_state = random_state

        super().__init__()

    def _fit(self, X, y):
        self._transform_hydra = HydraTransformer(
            n_kernels=self.n_kernels,
            n_groups=self.n_groups,
            n_jobs=self.n_jobs,
            random_state=self.random_state,
        )
        Xt_hydra = self._transform_hydra.fit_transform(X)

        self._scale_hydra = _SparseScaler()
        Xt_hydra = self._scale_hydra.fit_transform(Xt_hydra)

        self._transform_multirocket = MultiRocket(
            n_jobs=self.n_jobs,
            random_state=self.random_state,
        )
        Xt_multirocket = self._transform_multirocket.fit_transform(X)

        self._scale_multirocket = StandardScaler()
        Xt_multirocket = self._scale_multirocket.fit_transform(Xt_multirocket)

        Xt = np.concatenate((Xt_hydra, Xt_multirocket), axis=1)

        self.classifier = RidgeClassifierCV(
            alphas=np.logspace(-3, 3, 10), class_weight=self.class_weight
        )
        self.classifier.fit(Xt, y)

        return self

    def _predict(self, X) -> np.ndarray:
        Xt_hydra = self._transform_hydra.transform(X)
        Xt_hydra = self._scale_hydra.transform(Xt_hydra)

        Xt_multirocket = self._transform_multirocket.transform(X)
        Xt_multirocket = self._scale_multirocket.transform(Xt_multirocket)

        Xt = np.concatenate((Xt_hydra, Xt_multirocket), axis=1)

        return self.classifier.predict(Xt)

In [27]:
# Angus Dempster, Daniel F. Schmidt, Geoffrey I. Webb

# HYDRA: Competing convolutional kernels for fast and accurate time series classification
# https://arxiv.org/abs/2203.13652

# ** EXPERIMENTAL **
# This is an *untested*, *experimental* extension of Hydra to multivariate input.

# todo: cleanup, documentation

import numpy as np
import torch, torch.nn as nn, torch.nn.functional as F

class HydraMultivariate(nn.Module):

    def __init__(self, input_length, num_channels, k = 8, g = 64, max_num_channels = 8):

        super().__init__()

        self.k = k # num kernels per group
        self.g = g # num groups

        max_exponent = np.log2((input_length - 1) / (9 - 1)) # kernel length = 9

        self.dilations = 2 ** torch.arange(int(max_exponent) + 1)
        self.num_dilations = len(self.dilations)

        self.paddings = torch.div((9 - 1) * self.dilations, 2, rounding_mode = "floor").int()

        # if g > 1, assign: half the groups to X, half the groups to diff(X)
        divisor = 2 if self.g > 1 else 1
        _g = g // divisor
        self._g = _g

        self.W = [self.normalize(torch.randn(divisor, k * _g, 1, 9)) for _ in range(self.num_dilations)]

        # combine num_channels // 2 channels (2 < n < max_num_channels)
        num_channels_per = np.clip(num_channels // 2, 2, max_num_channels)
        self.I = [torch.randint(0, num_channels, (divisor, _g, num_channels_per)) for _ in range(self.num_dilations)]

    @staticmethod
    def normalize(W):
        W -= W.mean(-1, keepdims = True)
        W /= W.abs().sum(-1, keepdims = True)
        return W

    # transform in batches of *batch_size*
    def batch(self, X, batch_size = 256):
        num_examples = X.shape[0]
        if num_examples <= batch_size:
            return self(X)
        else:
            Z = []
            batches = torch.arange(num_examples).split(batch_size)
            for i, batch in enumerate(batches):
                Z.append(self(X[batch]))
            return torch.cat(Z)

    def forward(self, X):

        num_examples = X.shape[0]

        if self.g > 1:
            diff_X = torch.diff(X)

        Z = []

        for dilation_index in range(self.num_dilations):

            d = self.dilations[dilation_index].item()
            p = self.paddings[dilation_index].item()

            # diff_index == 0 -> X
            # diff_index == 1 -> diff(X)
            for diff_index in range(min(2, self.g)):

                _Z = F.conv1d(X[:, self.I[dilation_index][diff_index]].sum(2) if diff_index == 0 else diff_X[:, self.I[dilation_index][diff_index]].sum(2),
                              self.W[dilation_index][diff_index], dilation = d, padding = p,
                              groups = self._g) \
                      .view(num_examples, self._g, self.k, -1)

                max_values, max_indices = _Z.max(2)
                count_max = torch.zeros(num_examples, self._g, self.k)

                min_values, min_indices = _Z.min(2)
                count_min = torch.zeros(num_examples, self._g, self.k)

                count_max.scatter_add_(-1, max_indices, max_values)
                count_min.scatter_add_(-1, min_indices, torch.ones_like(min_values))

                Z.append(count_max)
                Z.append(count_min)

        Z = torch.cat(Z, 1).view(num_examples, -1)

        return Z

In [37]:
import sys  
import os  

# Add the parent directory to the Python path  
sys.path.append(os.path.abspath(os.path.join('..')))  

# Now you can import MultiRocket  


In [39]:
from multirocket.multirocket_multivariate import MultiRocket

In [None]:
# Chang Wei Tan, Angus Dempster, Christoph Bergmeir, Geoffrey I Webb
#
# MultiRocket: Multiple pooling operators and transformations for fast and effective time series classification
# https://arxiv.org/abs/2102.00457
import argparse
import os
import platform
import socket
import time
from datetime import datetime

import numba
import numpy as np
import pandas as pd
import psutil
import pytz
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
from sktime.datasets import load_from_tsfile_to_dataframe

from multirocket.multirocket_multivariate import MultiRocket
from utils.data_loader import process_ts_data
from utils.tools import create_directory

pd.set_option('display.max_columns', 500)

itr = 0
num_features = 10000
save = True
num_threads = 0

parser = argparse.ArgumentParser()
parser.add_argument("-d", "--datapath", type=str, required=False, default="E:\sahithi")
parser.add_argument("-p", "--problem", type=str, required=False, default="UWaveGestureLibrary")
parser.add_argument("-i", "--iter", type=int, required=False, default=0)
parser.add_argument("-n", "--num_features", type=int, required=False, default=10240)
parser.add_argument("-t", "--num_threads", type=int, required=False, default=-1)
parser.add_argument("-s", "--save", type=bool, required=False, default=True)
parser.add_argument("-v", "--verbose", type=int, required=False, default=2)

arguments = parser.parse_args()

if __name__ == '__main__':
    data_path = arguments.datapath
    problem = arguments.problem
    num_features = arguments.num_features
    num_threads = arguments.num_threads
    itr = arguments.iter
    save = arguments.save
    verbose = arguments.verbose

    output_path = os.getcwd() + "/output/"
    classifier_name = "MultiRocket_{}".format(num_features)

    data_folder = data_path + problem + "/"

    if os.path.exists(data_folder):
        if num_threads > 0:
            numba.set_num_threads(num_threads)
        output_path = os.getcwd() + "/output/"

        start = time.perf_counter()

        output_dir = "{}/multirocket/resample_{}/{}/{}/".format(
            output_path,
            itr,
            classifier_name,
            problem
        )
        if save:
            create_directory(output_dir)

        print("=======================================================================")
        print("Starting Experiments")
        print("=======================================================================")
        print("Data path: {}".format(data_path))
        print("Output Dir: {}".format(output_dir))
        print("Iteration: {}".format(itr))
        print("Problem: {}".format(problem))
        print("Number of Features: {}".format(num_features))

        # set data folder
        train_file = data_folder + problem + "_TRAIN.ts"
        test_file = data_folder + problem + "_TEST.ts"

        print("Loading data")
        X_train, y_train = load_from_tsfile_to_dataframe(train_file)
        X_test, y_test = load_from_tsfile_to_dataframe(test_file)

        encoder = LabelEncoder()
        y_train = encoder.fit_transform(y_train)
        y_test = encoder.transform(y_test)

        X_train = process_ts_data(X_train, normalise=False)
        X_test = process_ts_data(X_test, normalise=False)

        nb_classes = len(np.unique(np.concatenate((y_train, y_test), axis=0)))

        classifier = MultiRocket(
            num_features=num_features,
            classifier="logistic",
            verbose=verbose
        )
        yhat_train = classifier.fit(
            X_train, y_train,
            predict_on_train=False
        )

        if yhat_train is not None:
            train_acc = accuracy_score(y_train, yhat_train)
        else:
            train_acc = -1

        yhat_test = classifier.predict(X_test)
        test_acc = accuracy_score(y_test, yhat_test)

        # get cpu information
        physical_cores = psutil.cpu_count(logical=False)
        logical_cores = psutil.cpu_count(logical=True)
        cpu_freq = psutil.cpu_freq()
        max_freq = cpu_freq.max
        min_freq = cpu_freq.min
        memory = np.round(psutil.virtual_memory().total / 1e9)

        df_metrics = pd.DataFrame(data=np.zeros((1, 21), dtype=np.float), index=[0],
                                  columns=['timestamp', 'itr', 'classifier',
                                           'num_features',
                                           'dataset',
                                           'train_acc', 'train_time',
                                           'test_acc', 'test_time',
                                           'generate_kernel_time',
                                           'apply_kernel_on_train_time',
                                           'apply_kernel_on_test_time',
                                           'train_transform_time',
                                           'test_transform_time',
                                           'machine', 'processor',
                                           'physical_cores',
                                           "logical_cores",
                                           'max_freq', 'min_freq', 'memory'])
        df_metrics["timestamp"] = datetime.utcnow().replace(tzinfo=pytz.utc).strftime("%Y-%m-%d %H:%M:%S")
        df_metrics["itr"] = itr
        df_metrics["classifier"] = classifier_name
        df_metrics["num_features"] = num_features
        df_metrics["dataset"] = problem
        df_metrics["train_acc"] = train_acc
        df_metrics["train_time"] = classifier.train_duration
        df_metrics["test_acc"] = test_acc
        df_metrics["test_time"] = classifier.test_duration
        df_metrics["generate_kernel_time"] = classifier.generate_kernel_duration
        df_metrics["apply_kernel_on_train_time"] = classifier.apply_kernel_on_train_duration
        df_metrics["apply_kernel_on_test_time"] = classifier.apply_kernel_on_test_duration
        df_metrics["train_transform_time"] = classifier.train_transforms_duration
        df_metrics["test_transform_time"] = classifier.test_transforms_duration
        df_metrics["machine"] = socket.gethostname()
        df_metrics["processor"] = platform.processor()
        df_metrics["physical_cores"] = physical_cores
        df_metrics["logical_cores"] = logical_cores
        df_metrics["max_freq"] = max_freq
        df_metrics["min_freq"] = min_freq
        df_metrics["memory"] = memory

        print(df_metrics)
        if save:
            df_metrics.to_csv(output_dir + 'results.csv', index=False)

In [None]:
import numpy as np
import torch
from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

# Assuming the MultiRocket class is defined similarly to HydraMultivariate
class MultiRocketMultivariate(nn.Module):
    # Implementation of the MultiRocket model
    # Placeholder for your actual implementation
    def forward(self, X):
        # Extract features (dummy implementation)
        return torch.rand(X.shape[0], 128)  # Replace with actual feature extraction

# Define your datasets
X_train = torch.rand(6000, 10240, 2)  # Example input
y_train = np.random.randint(0, 2, 6000)  # Binary classification example
X_test = torch.rand(1500, 10240, 2)

# Initialize models
hydra_model = HydraMultivariate(input_length=10240, num_channels=2)
multirocket_model = MultiRocketMultivariate()

# Extract features using the Hydra model
hydra_features_train = hydra_model(X_train).detach().numpy()
hydra_features_test = hydra_model(X_test).detach().numpy()

# Extract features using the MultiRocket model
multirocket_features_train = multirocket_model(X_train).detach().numpy()
multirocket_features_test = multirocket_model(X_test).detach().numpy()

# Concatenate features
X_train_combined = np.concatenate((hydra_features_train, multirocket_features_train), axis=1)
X_test_combined = np.concatenate((hydra_features_test, multirocket_features_test), axis=1)

# Train SVM classifier
svm_model = make_pipeline(StandardScaler(), SVC(kernel='linear', random_state=0))
svm_model.fit(X_train_combined, y_train)

# Make predictions
y_pred = svm_model.predict(X_test_combined)

print("Predictions:", y_pred)


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming you have the following true and predicted labels
true_labels = y_test
predicted_labels = y_pred

# Calculate accuracy
accuracy = accuracy_score(true_labels, predicted_labels)

print(f"Accuracy: {accuracy * 100:.2f}%")

precision = precision_score(true_labels, predicted_labels)
print(f"Precision: {precision:.2f}")

# Calculate recall
recall = recall_score(true_labels, predicted_labels)
print(f"Recall: {recall:.2f}")

# Calculate F1-score
f1 = f1_score(true_labels, predicted_labels)
print(f"F1 Score: {f1:.2f}")