In [1]:
import pandas as pd
import numpy as np
import torch
from torch.utils.data import DataLoader, TensorDataset

In [2]:
# File paths
lr_train_path = "data/lr_train.csv"
hr_train_path = "data/hr_train.csv"
lr_test_path = "data/lr_test.csv"

# Load datasets
lr_train = pd.read_csv(lr_train_path)
hr_train = pd.read_csv(hr_train_path)
lr_test = pd.read_csv(lr_test_path)

# Convert to NumPy arrays
X_train = lr_train.to_numpy(dtype=np.float32)
y_train = hr_train.to_numpy(dtype=np.float32)
X_test = lr_test.to_numpy(dtype=np.float32)

In [3]:
# PCA (optinal cell)
from sklearn.decomposition import PCA

# Initialize PCA
X_train = lr_train.to_numpy(dtype=np.float32)
pca = PCA(n_components=100)

print("Shape before PCA:", X_train.shape)

X_train = pca.fit_transform(X_train)

print("Shape after PCA:", X_train.shape)


Shape before PCA: (167, 12720)
Shape after PCA: (167, 100)


In [4]:
# Convert NumPy arrays to PyTorch tensors
X_train = torch.tensor(X_train)
y_train = torch.tensor(y_train)
X_test = torch.tensor(X_test)

# Create PyTorch Dataset
train_dataset = TensorDataset(X_train, y_train)

# Define batch size
batch_size = 32

# Create train DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

# Create test DataLoader (without labels)
test_loader = DataLoader(X_test, batch_size=batch_size, shuffle=False)

# Check batch size
for batch in train_loader:
    X_batch, y_batch = batch
    print("X_batch Shape:", X_batch.shape)
    print("y_batch Shape:", y_batch.shape)
    break

X_batch Shape: torch.Size([32, 100])
y_batch Shape: torch.Size([32, 35778])
