In [None]:
import torch
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import default_collate
from torchvision.utils import make_grid
from einops import rearrange
from PIL import Image
import pickle
from tqdm.notebook import tqdm
import os
from torchvision import transforms
import shutil

In [None]:
base_path = './SSL_embeddings/Real'
batch_size = 64
def reverse(class_names):
    for class_name in tqdm(class_names, desc="Processing Classes"):
        for i in range(1,6):
            file_path = f"{base_path}/{class_name}/{class_name}_image_embeddings_batch_{i}.pkl"
            with open(file_path, "rb") as f:
                items = pickle.load(f)
            for start_idx in tqdm(range(0, len(items), batch_size), desc="Batches", leave=False):
                batch = default_collate(items[start_idx:start_idx + batch_size])
                images = batch['image_array']
                for j, img in enumerate(images):
                    # print(img.shape)
                    img = img.permute(2,0,1)
                    # print(img.shape)
                    image = transforms.ToPILImage()(img)
                    image.save(f"{base_path}/{class_name}/{class_name}_{i}_{start_idx + j}.jpg", "JPEG")

In [None]:
reverse(["ADI", "BACK", "DEB", "LYM", "MUC", "MUS", "NORM", "STR", "TUM"])

In [None]:
def train_valid_spilit(class_names):
    for class_name in tqdm(class_names, desc="Processing Classes"):
        for i in range(1,6):
            # file_path = f"{base_path}/{class_name}/{class_name}_image_embeddings_batch_{i}.pkl"
            # with open(file_path, "rb") as f:
            #     items = pickle.load(f)
            for start_idx in range(0, 1000):
                if start_idx < 800:
                    img_move = f"{base_path}/{class_name}/{class_name}_{i}_{start_idx}.jpg"
                    if not os.path.exists(img_move):
                        print(f"Source file '{img_move}' does not exist.")
                        return
                    
                    # Check if the destination directory exists, if not, create it
                    destination_dir = os.path.dirname(f"{base_path}/train/{class_name}")
                    if not os.path.exists(destination_dir):
                        os.makedirs(destination_dir)
                    try:
                        shutil.move(img_move, destination_dir)
                        print(f"File '{img_move}' moved to '{destination_dir}' successfully.")
                    except Exception as e:
                        print(f"Error occurred while moving file: {e}")
                else:
                    img_move = f"{base_path}/{class_name}/{class_name}_{i}_{start_idx}.jpg"
                    if not os.path.exists(img_move):
                        print(f"Source file '{img_move}' does not exist.")
                        return
                    
                    # Check if the destination directory exists, if not, create it
                    destination_dir = os.path.dirname(f"{base_path}/val/{class_name}")
                    if not os.path.exists(destination_dir):
                        os.makedirs(destination_dir)
                    try:
                        shutil.move(img_move, destination_dir)
                        print(f"File '{img_move}' moved to '{destination_dir}' successfully.")
                    except Exception as e:
                        print(f"Error occurred while moving file: {e}")


In [None]:
train_valid_spilit(["ADI", "BACK", "DEB", "LYM", "MUC", "MUS", "NORM", "STR", "TUM"])

In [23]:
def fold_restructure(class_names):
    for class_name in tqdm(class_names, desc="Processing Classes"):
        for i in range(1,6):
            for start_idx in range(0, 1000):
                if start_idx < 800:
                    img_move = f"{base_path}/train/{class_name}_{i}_{start_idx}.jpg"
                    if not os.path.exists(img_move):
                        print(f"Source file '{img_move}' does not exist.")
                        return
                    
                    # Check if the destination directory exists, if not, create it
                    destination_dir = os.path.dirname(f"{base_path}/train/{class_name}/")
                    if not os.path.exists(destination_dir):
                        os.makedirs(destination_dir)
                    try:
                        shutil.move(img_move, destination_dir)
                        print(f"File '{img_move}' moved to '{destination_dir}' successfully.")
                    except Exception as e:
                        print(f"Error occurred while moving file: {e}")
                else:
                    img_move = f"{base_path}/val/{class_name}_{i}_{start_idx}.jpg"
                    if not os.path.exists(img_move):
                        print(f"Source file '{img_move}' does not exist.")
                        return
                    
                    # Check if the destination directory exists, if not, create it
                    destination_dir = os.path.dirname(f"{base_path}/val/{class_name}/")
                    if not os.path.exists(destination_dir):
                        os.makedirs(destination_dir)
                    try:
                        shutil.move(img_move, destination_dir)
                        print(f"File '{img_move}' moved to '{destination_dir}' successfully.")
                    except Exception as e:
                        print(f"Error occurred while moving file: {e}")

In [24]:
fold_restructure(["ADI", "BACK", "DEB", "LYM", "MUC", "MUS", "NORM", "STR", "TUM"])

Processing Classes:   0%|          | 0/9 [00:00<?, ?it/s]

File './SSL_embeddings/Real/train/ADI_1_0.jpg' moved to './SSL_embeddings/Real/train/ADI' successfully.
File './SSL_embeddings/Real/train/ADI_1_1.jpg' moved to './SSL_embeddings/Real/train/ADI' successfully.
File './SSL_embeddings/Real/train/ADI_1_2.jpg' moved to './SSL_embeddings/Real/train/ADI' successfully.
File './SSL_embeddings/Real/train/ADI_1_3.jpg' moved to './SSL_embeddings/Real/train/ADI' successfully.
File './SSL_embeddings/Real/train/ADI_1_4.jpg' moved to './SSL_embeddings/Real/train/ADI' successfully.
File './SSL_embeddings/Real/train/ADI_1_5.jpg' moved to './SSL_embeddings/Real/train/ADI' successfully.
File './SSL_embeddings/Real/train/ADI_1_6.jpg' moved to './SSL_embeddings/Real/train/ADI' successfully.
File './SSL_embeddings/Real/train/ADI_1_7.jpg' moved to './SSL_embeddings/Real/train/ADI' successfully.
File './SSL_embeddings/Real/train/ADI_1_8.jpg' moved to './SSL_embeddings/Real/train/ADI' successfully.
File './SSL_embeddings/Real/train/ADI_1_9.jpg' moved to './SSL_e

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)

