In [None]:
import numpy as np
import glob
import os

# Specify the directory where your .npy files are located
directory = '/content/drive/MyDrive/extracted_arrays'

# Use glob to get all .npy files in the directory
npy_files = glob.glob(os.path.join(directory, '*.npy'))

# Sort the file list if needed (optional)
npy_files.sort()

# Initialize a list to hold the loaded arrays
arrays = []

# Loop over each file and load the array
for file_path in npy_files:
    # Load the array from the .npy file
    array = np.load(file_path)
    arrays.append(array)

    # Optionally, print the name of the file being loaded
    print(f"Loaded {file_path}")

# Now 'arrays' is a list containing all the loaded arrays

Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_0.npy
Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_1.npy
Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_10.npy
Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_11.npy
Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_12.npy
Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_13.npy
Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_14.npy
Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_15.npy
Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_16.npy
Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_17.npy
Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_18.npy
Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_19.npy
Loaded /content/drive/MyDrive/extracted_arrays/SW_PMTEST_001_array_0_2.npy
Loaded /content

In [None]:
import torch
import numpy as np

# Assuming 'arrays' is your list of NumPy arrays

# Pad arrays to the maximum shape???
max_shape = tuple(max(shape[i] for shape in [arr.shape for arr in arrays])
                  for i in range(len(arrays[0].shape))) # find max shape

# Pad arrays with 0 to make them uniform???
padded_arrays = [np.pad(arr, [(0, max_shape[i] - arr.shape[i]) for i in range(len(arr.shape))],
                        mode='constant') for arr in arrays]

# Now convert the padded arrays into a NumPy array
data_np = np.array(padded_arrays)

# Convert to a PyTorch tensor
data_tensor = torch.from_numpy(data_np).float()

# If needed, add a channel dimension
data_tensor = data_tensor.unsqueeze(1)  # Now shape is (num_samples, 1, height, width)

In [None]:
# Change the path to where your repository is located in your Google Drive
%cd /content/drive/MyDrive/pytorch-cyclegan-and-pix2pix

# Install dependencies if you have a requirements.txt file
!pip install -r requirements.txt

/content/drive/MyDrive/pytorch-cyclegan-and-pix2pix
Collecting dominate>=2.4.0 (from -r requirements.txt (line 3))
  Downloading dominate-2.9.1-py2.py3-none-any.whl.metadata (13 kB)
Collecting visdom>=0.1.8.8 (from -r requirements.txt (line 4))
  Downloading visdom-0.2.4.tar.gz (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m34.2 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading dominate-2.9.1-py2.py3-none-any.whl (29 kB)
Building wheels for collected packages: visdom
  Building wheel for visdom (setup.py) ... [?25l[?25hdone
  Created wheel for visdom: filename=visdom-0.2.4-py3-none-any.whl size=1408196 sha256=c3510c557765c991318d00b17c38a47bc90b07e86f6e3f0abe91a2b4bf899251
  Stored in directory: /root/.cache/pip/wheels/42/29/49/5bed207bac4578e4d2c0c5fc0226bfd33a7e2953ea56356855
Successfully built visdom
Installing collected packages: dominate, visdom
Successfully installed dominate-2.9.1 

In [None]:
%%bash
mkdir -p dataset/trainA
mkdir -p dataset/trainB
mkdir -p dataset/testA
mkdir -p dataset/testB

In [None]:
import os
import numpy as np
import shutil
import random

# Directories
npy_dir = '/content/drive/MyDrive/extracted_arrays'
trainA_dir = 'dataset/trainA'
trainB_dir = 'dataset/trainB'
testA_dir = 'dataset/testA'
testB_dir = 'dataset/testB'

# Create directories if they don't exist
os.makedirs(trainA_dir, exist_ok=True)
os.makedirs(trainB_dir, exist_ok=True)
os.makedirs(testA_dir, exist_ok=True)
os.makedirs(testB_dir, exist_ok=True)

# Seed for reproducibility
random.seed(42)

# List all .npy files
npy_files = [f for f in os.listdir(npy_dir) if f.endswith('.npy')]

# Functions to split data
def split_files(files, train_ratio=0.8):
    random.shuffle(files)
    split_index = int(len(files) * train_ratio)
    return files[:split_index], files[split_index:]

# Separate files into Domain A and Domain B
domainA_files = []
domainB_files = []

for filename in npy_files:
    parts = filename.split('_')
    # Assuming filename format: 'base_name_array_i_j.npy'
    i = int(parts[-2])
    if i == 0:
        domainA_files.append(filename)
    elif i == 1:
        domainB_files.append(filename)

# Split each domain's files into training and testing sets
trainA_files, testA_files = split_files(domainA_files)
trainB_files, testB_files = split_files(domainB_files)

# Copy files to the respective directories
for filename in trainA_files:
    shutil.copy(os.path.join(npy_dir, filename), os.path.join(trainA_dir, filename))

for filename in testA_files:
    shutil.copy(os.path.join(npy_dir, filename), os.path.join(testA_dir, filename))

for filename in trainB_files:
    shutil.copy(os.path.join(npy_dir, filename), os.path.join(trainB_dir, filename))

for filename in testB_files:
    shutil.copy(os.path.join(npy_dir, filename), os.path.join(testB_dir, filename))

print("Files have been organized into training and testing sets.")

Files have been organized into training and testing sets.


**Prepare Training ** Change the path to the repository and install dependencies.

In [None]:
# Change the path to where your repository is located in your Google Drive
%cd /content/drive/MyDrive/pytorch-cyclegan-and-pix2pix

# Install dependencies if you have a requirements.txt file
!pip install -r requirements.txt

/content/drive/MyDrive/pytorch-cyclegan-and-pix2pix
Collecting dominate>=2.4.0 (from -r requirements.txt (line 3))
  Downloading dominate-2.9.1-py2.py3-none-any.whl.metadata (13 kB)
Collecting visdom>=0.1.8.8 (from -r requirements.txt (line 4))
  Downloading visdom-0.2.4.tar.gz (1.4 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.4/1.4 MB[0m [31m19.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading dominate-2.9.1-py2.py3-none-any.whl (29 kB)
Building wheels for collected packages: visdom
  Building wheel for visdom (setup.py) ... [?25l[?25hdone
  Created wheel for visdom: filename=visdom-0.2.4-py3-none-any.whl size=1408196 sha256=83f02da9602d4ced207dca9a1c949f72238f0eed37c161cea8e2a26f0f0cb41a
  Stored in directory: /root/.cache/pip/wheels/42/29/49/5bed207bac4578e4d2c0c5fc0226bfd33a7e2953ea56356855
Successfully built visdom
Installing collected packages: dominate, visdom
Successfully installed dominate-2.9.1 

# Training

-   `python train.py --dataroot ./datasets/horse2zebra --name horse2zebra --model cycle_gan`

Change the `--dataroot` and `--name` to your own dataset's path and model's name. Use `--gpu_ids 0,1,..` to train on multiple GPUs and `--batch_size` to change the batch size. I've found that a batch size of 16 fits onto 4 V100s and can finish training an epoch in ~90s.

Once your model has trained, copy over the last checkpoint to a format that the testing model can automatically detect:

Use `cp ./checkpoints/horse2zebra/latest_net_G_A.pth ./checkpoints/horse2zebra/latest_net_G.pth` if you want to transform images from class A to class B and `cp ./checkpoints/horse2zebra/latest_net_G_B.pth ./checkpoints/horse2zebra/latest_net_G.pth` if you want to transform images from class B to class A.


In [None]:
!python train.py --dataroot  /content/drive/MyDrive/pytorch-cyclegan-and-pix2pix/dataset --name npy-cyclegan --model cycle_gan --display_id -1 --input_nc 1 --output_nc 1

python3: can't open file '/content/train.py': [Errno 2] No such file or directory


continue training Use '--continue_train' to resume your previous training.

In [None]:
!python train.py --dataroot  /content/drive/MyDrive/pytorch-cyclegan-and-pix2pix/dataset --name npy-cyclegan --model cycle_gan --display_id -1 --input_nc 1 --output_nc 1 --save_epoch_freq 1

----------------- Options ---------------
               batch_size: 1                             
                    beta1: 0.5                           
          checkpoints_dir: ./checkpoints                 
           continue_train: False                         
                crop_size: 256                           
                 dataroot: /content/drive/MyDrive/pytorch-cyclegan-and-pix2pix/dataset	[default: None]
             dataset_mode: unaligned                     
                direction: AtoB                          
              display_env: main                          
             display_freq: 400                           
               display_id: -1                            	[default: 1]
            display_ncols: 4                             
             display_port: 8097                          
           display_server: http://localhost              
          display_winsize: 256                           
                    epoch: lat

If a GPU is available, this should return a list of GPU devices. If it returns an empty list, the GPU is not accessible.

In [None]:
import tensorflow as tf
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

In [None]:
!python train.py --dataroot  /content/drive/MyDrive/pytorch-cyclegan-and-pix2pix/dataset --name npy-cyclegan --model cycle_gan --display_id 0 --input_nc 1 --output_nc 1 --gpu_ids 0 --save_epoch_freq 1 --continue_train

----------------- Options ---------------
               batch_size: 1                             
                    beta1: 0.5                           
          checkpoints_dir: ./checkpoints                 
           continue_train: True                          	[default: False]
                crop_size: 256                           
                 dataroot: /content/drive/MyDrive/pytorch-cyclegan-and-pix2pix/dataset	[default: None]
             dataset_mode: unaligned                     
                direction: AtoB                          
              display_env: main                          
             display_freq: 400                           
               display_id: 0                             	[default: 1]
            display_ncols: 4                             
             display_port: 8097                          
           display_server: http://localhost              
          display_winsize: 256                           
             

# Testing

-   `python test.py --dataroot datasets/horse2zebra/testA --name horse2zebra_pretrained --model test --no_dropout`

Change the `--dataroot` and `--name` to be consistent with your trained model's configuration.

> from https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix:
> The option --model test is used for generating results of CycleGAN only for one side. This option will automatically set --dataset_mode single, which only loads the images from one set. On the contrary, using --model cycle_gan requires loading and generating results in both directions, which is sometimes unnecessary. The results will be saved at ./results/. Use --results_dir {directory_path_to_save_result} to specify the results directory.

> For your own experiments, you might want to specify --netG, --norm, --no_dropout to match the generator architecture of the trained model.

In [None]:
!python test.py --dataroot datasets/horse2zebra/testA --name horse2zebra_pretrained --model test --no_dropout

# Visualize

In [None]:
import matplotlib.pyplot as plt

img = plt.imread('./results/horse2zebra_pretrained/test_latest/images/n02381460_1010_fake.png')
plt.imshow(img)

In [None]:
import matplotlib.pyplot as plt

img = plt.imread('./results/horse2zebra_pretrained/test_latest/images/n02381460_1010_real.png')
plt.imshow(img)

In [None]:
# Extract category A and B arrays
category_A = data[0]  # Arrays from data[0, j, :, :]
category_B = data[1]  # Arrays from data[1, j, :, :]

# Create labels
labels_A = np.zeros(len(category_A), dtype=np.int64)  # Label 0 for category A
labels_B = np.ones(len(category_B), dtype=np.int64)   # Label 1 for category B

Cloning into 'pytorch-cyclegan-and-pix2pix'...
Host key verification failed.
fatal: Could not read from remote repository.

Please make sure you have the correct access rights
and the repository exists.


In [None]:
# Combine data and labels
X = np.concatenate((category_A, category_B), axis=0)
y = np.concatenate((labels_A, labels_B), axis=0)

# Shuffle the data
from sklearn.utils import shuffle

X, y = shuffle(X, y, random_state=42)

Cloning into 'pytorch-cyclegan-and-pix2pix'...
fatal: could not read Password for 'https://shirlyn_xu-admin@bitbucket.org': No such device or address


In [None]:
from sklearn.model_selection import train_test_split

# Split the data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, stratify=y, random_state=42
)

In [None]:
import torch

# Convert to tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long)

# If your data does not have a channel dimension, add one
X_train_tensor = X_train_tensor.unsqueeze(1)  # Shape: (N, 1, H, W)
X_test_tensor = X_test_tensor.unsqueeze(1)

In [None]:
from torch.utils.data import TensorDataset, DataLoader

# Create TensorDatasets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
import torch.nn as nn
import torch.nn.functional as F

class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        # Adjust the number of input channels and output classes as needed
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        # Calculate the size of the input to the first fully connected layer
        # Assuming input images are of size HxW
        H, W = X_train_tensor.shape[2], X_train_tensor.shape[3]
        self.fc1 = nn.Linear(16 * (H // 2) * (W // 2), 2)  # '2' is the number of classes

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = x.view(x.size(0), -1)  # Flatten
        x = self.fc1(x)
        return x

# Instantiate the model
model = SimpleCNN()

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
num_epochs = 10  # Set the number of epochs

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0

    for inputs, labels in train_loader:
        optimizer.zero_grad()       # Zero the parameter gradients
        outputs = model(inputs)     # Forward pass
        loss = criterion(outputs, labels)  # Compute loss
        loss.backward()             # Backward pass
        optimizer.step()            # Update weights

        running_loss += loss.item()

    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {running_loss/len(train_loader)}')

In [None]:
model.eval()  # Set model to evaluation mode
correct = 0
total = 0

with torch.no_grad():  # Disable gradient computation
    for inputs, labels in test_loader:
        outputs = model(inputs)
        _, predicted = torch.max(outputs.data, 1)  # Get class with highest score
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f'Accuracy on the test set: {accuracy:.2f}%')