In [58]:
import torch
from torch.utils.data import TensorDataset, DataLoader, random_split
from sklearn.preprocessing import StandardScaler
import numpy as np

In [59]:
loaded_datasets_info = torch.load('/Users/jiaming/Desktop/Lab2/datas/saved_datasets.pth')
loaded_train_dataset = loaded_datasets_info['train_dataset']
loaded_val_dataset = loaded_datasets_info['val_dataset']
loaded_test_dataset = loaded_datasets_info['test_dataset']

In [60]:
from imblearn.over_sampling import SMOTE

def scale_datasets(train_dataset, val_dataset, test_dataset):
    scaler = StandardScaler()

    # Helper function to extract tensors from Subset
    def get_features_labels(dataset):
        loader = DataLoader(dataset, batch_size=len(dataset))
        features, labels = next(iter(loader))
        return features, labels

    # Extract and reshape data for scaling
    train_features, train_labels = get_features_labels(train_dataset)
    val_features, val_labels = get_features_labels(val_dataset)
    test_features, test_labels = get_features_labels(test_dataset)

    # Flatten the tensors for scaling
    train_features_flat = train_features.reshape(train_features.size(0), -1).numpy()
    val_features_flat = val_features.reshape(val_features.size(0), -1).numpy()
    test_features_flat = test_features.reshape(test_features.size(0), -1).numpy()

    # Fit on training data
    scaler.fit(train_features_flat)

    # Transform all datasets
    train_features_scaled = scaler.transform(train_features_flat)
    val_features_scaled = scaler.transform(val_features_flat)
    test_features_scaled = scaler.transform(test_features_flat)

    # Convert back to tensors
    train_features_scaled = torch.tensor(train_features_scaled, dtype=torch.float32).view_as(train_features)
    val_features_scaled = torch.tensor(val_features_scaled, dtype=torch.float32).view_as(val_features)
    test_features_scaled = torch.tensor(test_features_scaled, dtype=torch.float32).view_as(test_features)

    # Create new TensorDatasets
    train_dataset_scaled = TensorDataset(train_features_scaled, train_labels)
    val_dataset_scaled = TensorDataset(val_features_scaled, val_labels)
    test_dataset_scaled = TensorDataset(test_features_scaled, test_labels)

    return train_dataset_scaled, val_dataset_scaled, test_dataset_scaled



In [61]:
# Scale your datasets
loaded_train_dataset_scaled, loaded_val_dataset_scaled, loaded_test_dataset_scaled = scale_datasets(loaded_train_dataset, loaded_val_dataset, loaded_test_dataset)

In [62]:
# save datas
torch.save({
    'train_dataset': loaded_train_dataset_scaled,  
    'val_dataset': loaded_val_dataset_scaled,
    'test_dataset': loaded_test_dataset_scaled
}, '/Users/jiaming/Desktop/Lab2/datas/saved_datasets_scaled.pth')