In [2]:
import pandas as pd

In [3]:
from sklearn.model_selection import train_test_split

In [4]:
import random
import numpy as np
import torch
import os

SEED = 64
# SET RANDOM SEED FOR REPRODUCIBILITY
torch.manual_seed(SEED)
random.seed(SEED)
np.random.seed(SEED)

In [7]:
def save_dataset_splits(name):
    # Load the full CSV dataset
    df = pd.read_csv(f"ultra_dense/datasets/{name}.csv")

    # Split into train, val, and test
    X_train, X_val = train_test_split(df, test_size=0.15, random_state=SEED)
    X_val, X_test = train_test_split(X_val, test_size=1/3, random_state=SEED)

    # Reset index
    X_train = X_train.reset_index(drop=True)
    X_val = X_val.reset_index(drop=True)
    X_test = X_test.reset_index(drop=True)

    # Extract labels (last two columns)
    y_train_X = X_train.iloc[:, -2]
    y_val_X = X_val.iloc[:, -2]
    y_test_X = X_test.iloc[:, -2]

    y_train_Y = X_train.iloc[:, -1]
    y_val_Y = X_val.iloc[:, -1]
    y_test_Y = X_test.iloc[:, -1]

    # Remove labels from feature sets
    X_train = X_train.iloc[:, :-2]
    X_val = X_val.iloc[:, :-2]
    X_test = X_test.iloc[:, :-2]

    # Make sure the output folder exists
    os.makedirs(f"{name}_X", exist_ok=True)
    os.makedirs(f"{name}_Y", exist_ok=True)

    # Save to .npy files
    def save_as_npy_X(data, filename):
        npy_file = os.path.join(f"{name}_X", f"{filename}.npy")
        if hasattr(data, "to_numpy"):
            data = data.to_numpy()
        np.save(npy_file, data)

    def save_as_npy_Y(data, filename):
        npy_file = os.path.join(f"{name}_Y", f"{filename}.npy")
        if hasattr(data, "to_numpy"):
            data = data.to_numpy()
        np.save(npy_file, data)

    save_as_npy_X(X_train, "N_train")
    save_as_npy_X(X_val, "N_val")
    save_as_npy_X(X_test, "N_test")

    save_as_npy_X(y_train_X, "y_train")
    save_as_npy_X(y_val_X, "y_val")
    save_as_npy_X(y_test_X, "y_test")

    save_as_npy_Y(X_train, "N_train")
    save_as_npy_Y(X_val, "N_val")
    save_as_npy_Y(X_test, "N_test")

    save_as_npy_Y(y_train_Y, "y_train")
    save_as_npy_Y(y_val_Y, "y_val")
    save_as_npy_Y(y_test_Y, "y_test")

In [8]:
save_dataset_splits(name="DIS_lab_LoS_8")

In [9]:
save_dataset_splits(name="DIS_lab_LoS_16")

In [None]:
save_dataset_splits(name="ULA_lab_LoS_16")

In [None]:
save_dataset_splits(name="ULA_lab_LoS_8")

In [None]:
save_dataset_splits(name="URA_lab_LoS_16")

In [None]:
save_dataset_splits(name="URA_lab_LoS_8")