In [2]:
pip install torch

Collecting torch
  Downloading torch-2.8.0-cp311-cp311-win_amd64.whl.metadata (30 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Downloading torch-2.8.0-cp311-cp311-win_amd64.whl (241.4 MB)
   ---------------------------------------- 0.0/241.4 MB ? eta -:--:--
   ---------------------------------------- 0.1/241.4 MB 2.6 MB/s eta 0:01:32
   ---------------------------------------- 0.2/241.4 MB 2.6 MB/s eta 0:01:32
   ---------------------------------------- 0.4/241.4 MB 3.1 MB/s eta 0:01:18
   ---------------------------------------- 0.6/241.4 MB 3.5 MB/s eta 0:01:08
   ---------------------------------------- 0.8/241.4 MB 3.4 MB/s eta 0:01:11
   ---------------------------------------- 0.9/241.4 MB 3.4 MB/s eta 0:01:10
   ---------------------------------------- 1.1/241.4 MB 3.6 MB/s eta 0:01:06
   ---------------------------------------- 1.3/241.4 MB 3.6 MB/s eta 0:01:07
   ---------------------------------------- 1.5/241.4 MB 3.

In [3]:
import os
import zipfile
import urllib.request
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.io import arff
import torch
from torch.utils.data import TensorDataset, DataLoader

# Directory where datasets will be downloaded and extracted
DATA_DIR = 'datasets'

# Ensure the dataset directory exists
os.makedirs(DATA_DIR, exist_ok=True)

def download_dataset(dataset_name, url):
    """
    Downloads and extracts a zip file containing the dataset.
    """
    zip_path = os.path.join(DATA_DIR, f"{dataset_name}.zip")
    extract_path = os.path.join(DATA_DIR, dataset_name)

    # Download the dataset
    print(f"Downloading {dataset_name} from {url}...")
    urllib.request.urlretrieve(url, zip_path)

    # Extract the zip file
    print(f"Extracting {dataset_name}...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(extract_path)

    # Remove the zip file after extraction
    os.remove(zip_path)
    print(f"Dataset {dataset_name} extracted to {extract_path}.")
    return extract_path

def load_arff_data(file_path):
    """
    Loads ARFF file and converts it to a pandas DataFrame.
    """
    print(f"Loading ARFF file: {file_path}")
    data, meta = arff.loadarff(file_path)
    df = pd.DataFrame(data)
    return df

def preprocess_data(train_df, test_df, batch_size=64):
    """
    Preprocesses the data:
    - Splits the features and labels.
    - Normalizes the features.
    - Converts them into PyTorch tensors.
    - Creates DataLoaders for training, validation, and testing.
    """
    # Separate features and labels
    train_features = train_df.drop(columns=['class'])  # Assuming 'class' is the label column
    test_features = test_df.drop(columns=['class'])

    # Adjust labels to start from 0
    train_labels = train_df['class'].apply(lambda x: int(x) - 1).values
    test_labels = test_df['class'].apply(lambda x: int(x) - 1).values

    # Normalize features
    scaler = StandardScaler()
    train_features_normalized = scaler.fit_transform(train_features)
    test_features_normalized = scaler.transform(test_features)

    # Reshape the features into 3D arrays (samples, time_steps, dimensions)
    X_train = train_features_normalized.reshape(-1, 60, 1)  # Adjust time_steps to 60
    X_test = test_features_normalized.reshape(-1, 60, 1)

    # Split test data into validation and test sets
    X_valid, X_test, y_valid, y_test = train_test_split(X_test, test_labels, test_size=0.50, random_state=42)

    # Convert data to PyTorch tensors
    X_train = torch.tensor(X_train, dtype=torch.float32)
    y_train = torch.tensor(train_labels, dtype=torch.int64)

    X_valid = torch.tensor(X_valid, dtype=torch.float32)
    y_valid = torch.tensor(y_valid, dtype=torch.int64)

    X_test = torch.tensor(X_test, dtype=torch.float32)
    y_test = torch.tensor(y_test, dtype=torch.int64)

    # Output dataset shapes
    print(f"X_train shape: {X_train.shape}, y_train shape: {y_train.shape}")
    print(f"X_valid shape: {X_valid.shape}, y_valid shape: {y_valid.shape}")
    print(f"X_test shape: {X_test.shape}, y_test shape: {y_test.shape}")

    # Create DataLoaders
    train_dataset = TensorDataset(X_train, y_train)
    valid_dataset = TensorDataset(X_valid, y_valid)
    test_dataset = TensorDataset(X_test, y_test)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, drop_last=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, drop_last=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, drop_last=True)

    # Return both the DataLoaders and the raw tensors
    return train_loader, valid_loader, test_loader, X_train, X_valid, X_test, y_train, y_valid, y_test

# Example usage for downloading, extracting, and preprocessing the SharePriceIncrease dataset
if __name__ == "__main__":
    # URL for the dataset (replace with the actual dataset you want to download)
    dataset_name = 'SharePriceIncrease'
    dataset_url = 'https://timeseriesclassification.com/aeon-toolkit/SharePriceIncrease.zip'
    extract_path = download_dataset(dataset_name, dataset_url)

    # Load ARFF data
    train_file = os.path.join(extract_path, f'{dataset_name}_TRAIN.arff')
    test_file = os.path.join(extract_path, f'{dataset_name}_TEST.arff')

    # Load data into Pandas DataFrames
    train_df = load_arff_data(train_file)
    test_df = load_arff_data(test_file)

    # Preprocess the data
    train_loader, valid_loader, test_loader, X_train, X_valid, X_test, y_train, y_valid, y_test = preprocess_data(train_df, test_df)

    n_classes = len(torch.unique(y_train))

    # Output the number of classes
    print(f"Number of classes: {n_classes}")


Downloading SharePriceIncrease from https://timeseriesclassification.com/aeon-toolkit/SharePriceIncrease.zip...
Extracting SharePriceIncrease...
Dataset SharePriceIncrease extracted to datasets\SharePriceIncrease.
Loading ARFF file: datasets\SharePriceIncrease\SharePriceIncrease_TRAIN.arff
Loading ARFF file: datasets\SharePriceIncrease\SharePriceIncrease_TEST.arff
X_train shape: torch.Size([965, 60, 1]), y_train shape: torch.Size([965])
X_valid shape: torch.Size([483, 60, 1]), y_valid shape: torch.Size([483])
X_test shape: torch.Size([483, 60, 1]), y_test shape: torch.Size([483])
Number of classes: 2
