#### Unzip dataset folder

In [1]:
import zipfile

# Specify the zip file path and the output directory
zip_file_path = "D:/RESEARCH/BCI/BCI_dataset.zip" # This file contains 2000 pair images.
output_dir = "D:/RESEARCH/BCI"

# Open the zip file and extract its contents
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(output_dir)

print(f"Files extracted to {output_dir}")


Files extracted to D:/RESEARCH/BCI


#### Take a fraction of dataset to minimize the dataset size. This dataset is already paired.

In [6]:
import os
import shutil
import random

In [7]:
# Paths to the original dataset
base_dir = "D:/RESEARCH/BCI/BCI_dataset"
trainA_dir = os.path.join(base_dir, "trainA")
trainB_dir = os.path.join(base_dir, "trainB")

# Paths for the modified dataset
modified_base_dir = "D:/RESEARCH/BCI/BCI_modified"
modified_trainA_dir = os.path.join(modified_base_dir, "trainA")
modified_trainB_dir = os.path.join(modified_base_dir, "trainB")
modified_testA_dir = os.path.join(modified_base_dir, "testA")
modified_testB_dir = os.path.join(modified_base_dir, "testB")

# Create directories for the modified dataset
os.makedirs(modified_trainA_dir, exist_ok=True)
os.makedirs(modified_trainB_dir, exist_ok=True)
os.makedirs(modified_testA_dir, exist_ok=True)
os.makedirs(modified_testB_dir, exist_ok=True)

# Get list of image pairs (ensure matching files in trainA and trainB)
trainA_files = sorted(os.listdir(trainA_dir))
trainB_files = sorted(os.listdir(trainB_dir))
assert len(trainA_files) == len(trainB_files), "Mismatch in trainA and trainB file counts."

# Sample 1,000 pairs
sample_size = 1000
sample_indices = random.sample(range(len(trainA_files)), sample_size)

# Split indices into 80% train and 20% test
train_split = int(0.8 * sample_size)
train_indices = sample_indices[:train_split]
test_indices = sample_indices[train_split:]

# Function to copy files to the new dataset
def copy_files(indices, srcA, srcB, destA, destB):
    for idx in indices:
        fileA = trainA_files[idx]
        fileB = trainB_files[idx]
        shutil.copy(os.path.join(srcA, fileA), os.path.join(destA, fileA))
        shutil.copy(os.path.join(srcB, fileB), os.path.join(destB, fileB))

# Copy train and test files
copy_files(train_indices, trainA_dir, trainB_dir, modified_trainA_dir, modified_trainB_dir)
copy_files(test_indices, trainA_dir, trainB_dir, modified_testA_dir, modified_testB_dir)

print(f"Modified dataset created with 1,000 pairs (2,000 images) in {modified_base_dir}.")
print(f"Train: {len(train_indices)} pairs, Test: {len(test_indices)} pairs.")


Modified dataset created with 1,000 pairs (2,000 images) in D:/RESEARCH/BCI/BCI_modified.
Train: 800 pairs, Test: 200 pairs.
