In [1]:
import numpy as np
import torch
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler

In [2]:
def pack_fusefeatures_labels(fused_features, processed_labels):
    data_package = {
            'features': fused_features,
            'labels': processed_labels
        }
    return data_package

In [3]:
def unpack_fusefeatures_labels(data_package, mode, Preprocess, scale_path):
    fused_features = data_package['features']
    processed_labels = data_package['labels']
    return fused_features, processed_labels

In [4]:
import joblib
def standard_data(fused_features, processed_labels, mode, Preprocess, scale_path):
    if Preprocess is not None:
        if Preprocess == 'standardize':
            scaler = StandardScaler()
        elif Preprocess == 'normalize':
            scaler = MinMaxScaler()
        else:
            raise ValueError("Preprocess must be 'standardize', 'normalize', or None")
        if mode == 'train':
            scaler.fit(fused_features)
            joblib.dump(scaler, scale_path)
            fused_features = scaler.transform(fused_features)
        elif mode == 'test':
            scaler = joblib.load(scale_path)
            fused_features = scaler.transform(fused_features)
        else:
            raise ValueError("Mode must be 'train' or 'test'")
    elif Preprocess is None:
        pass
    else:
        raise ValueError("Invalid value for Preprocess. It must be 'standardize', 'normalize', or None")

    return fused_features, processed_labels


In [5]:
def load_data_mlp(fused_features, labels, batch_size):    
    features_train, features_test, labels_train, labels_test = train_test_split(fused_features, labels, test_size=0.2, random_state=42)
    features_train = torch.tensor(features_train, dtype=torch.float32)
    features_test = torch.tensor(features_test, dtype=torch.float32)
    labels_train = torch.tensor(labels_train, dtype=torch.long)
    labels_test = torch.tensor(labels_test, dtype=torch.long)

    train_dataset = TensorDataset(features_train, labels_train)
    test_dataset = TensorDataset(features_test, labels_test)
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, test_loader
    

In [6]:
def load_data_mlp_10fold_cv(fused_features, labels, train_idx, test_idx, batch_size):
    fused_features_train = torch.tensor(fused_features[train_idx], dtype=torch.float32)
    labels_train = torch.tensor(labels[train_idx], dtype=torch.long)
    
    fused_features_test = torch.tensor(fused_features[test_idx], dtype=torch.float32)
    labels_test = torch.tensor(labels[test_idx], dtype=torch.long)
    
    train_dataset = TensorDataset(fused_features_train, labels_train)
    test_dataset = TensorDataset(fused_features_test, labels_test)
    
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    return train_loader, test_loader


In [7]:
def load_data_for_offline_test(fused_features, labels, batch_size):    
    features_tensor = torch.tensor(fused_features, dtype=torch.float32)
    labels_tensor = torch.tensor(labels, dtype=torch.long)
    dataset = TensorDataset(features_tensor, labels_tensor)
    data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
    return data_loader
