# Testset implementation

file to implement the testset on which we will evaluate the CNN

In [None]:
import torch
import torchvision
import numpy as np
import os
from PIL import Image

In [None]:
from utils import to_pil

### Directory structure generation for number concatenated on the horizontal axis

In [None]:
# Specify the directory name
directory_name = "data/TestWMNIST"

# Create the directory
try:
    os.mkdir(directory_name)
    print(f"Directory '{directory_name}' created successfully.")
except FileExistsError:
    print(f"Directory '{directory_name}' already exists.")
except PermissionError:
    print(f"Permission denied: Unable to create '{directory_name}'.")
except Exception as e:
    print(f"An error occurred: {e}")

In [None]:
smallest_number = 0
largest_number = 999

for i in range(smallest_number,largest_number+1):
    # Specify the directory name
    directory_name = "data/TestWMNIST/" + str(i)

    # Create class folders
    try:
        os.mkdir(directory_name)
        print(f"Directory '{directory_name}' created successfully.")
    except FileExistsError:
        print(f"Directory '{directory_name}' already exists.")
    except PermissionError:
        print(f"Permission denied: Unable to create '{directory_name}'.")
    except Exception as e:
        print(f"An error occurred: {e}")

### Horizontal concatenated number generation

In [None]:
# MNIST dataset from which the digit will be extracted
ds = torchvision.datasets.MNIST(
    root='./data',
    train=False,
    transform=None,
    download=True
)

# saving the indexes at which each class appear
# ex: for i = 0, we create in label_indices[0] a list of all the indexes that represent class 0 in MNIST dataset
label_indices = { i:torch.where(ds.targets == i)[0] for i in range(10) }
save_dir = "data/TestWMNIST"

# function to get 1 generated number (1,2 or 3 digit number depending on idx)
# j represent the counter inside a class
def get_1_channel_number(idx, j):
    digits = list(str(idx))
    digit_images = []
    for d in digits:
        indices = label_indices[int(d)]
        chosen_idx = indices[torch.randint(len(indices), (1,)).item()]
        img = ds.data[chosen_idx]
        digit_images.append(img)
    concat_img = torch.cat(digit_images, dim=1)
    img = to_pil(concat_img)
    img.save(os.path.join(save_dir + "/" + str(i), str(j) + ".png"))
    
for i in range(0, 1000):
    if i<10:
        for j in range(0, 1500):
            get_1_channel_number(i, j)
    if i<100:
        for j in range(0, 167):
            get_1_channel_number(i, j)
    else:
        for j in range(0, 17):
            get_1_channel_number(i, j)

### Directory structure generation for number concatenated on the channel axis

In [None]:
# Specify the directory name
directory_name = "data/TestCMNIST"

# Create the directory
try:
    os.mkdir(directory_name)
    print(f"Directory '{directory_name}' created successfully.")
except FileExistsError:
    print(f"Directory '{directory_name}' already exists.")
except PermissionError:
    print(f"Permission denied: Unable to create '{directory_name}'.")
except Exception as e:
    print(f"An error occurred: {e}")

In [None]:
smallest_number = 0
largest_number = 999

for i in range(smallest_number,largest_number+1):
    # Specify the directory name
    directory_name = "data/TestCMNIST/" + str(i)

    # Create class folders
    try:
        os.mkdir(directory_name)
        print(f"Directory '{directory_name}' created successfully.")
    except FileExistsError:
        print(f"Directory '{directory_name}' already exists.")
    except PermissionError:
        print(f"Permission denied: Unable to create '{directory_name}'.")
    except Exception as e:
        print(f"An error occurred: {e}")

### Channel concatenated number generation

In [None]:
# MNIST dataset from which the digit will be extracted
ds = torchvision.datasets.MNIST(
    root='./data',
    train=False,
    transform=None,
    download=True
)

# maximum number length in digits
max_digits = 3

# saving the indexes at which each class appear
# ex: for i = 0, we create in label_indices[0] a list of all the indexes that represent class 0 in MNIST dataset
label_indices = { i:torch.where(ds.targets == i)[0] for i in range(10) }
save_dir = "data/TestCMNIST"

# function to get 1 generated number (1,2 or 3 digit number depending on idx)
# j represent the counter inside a class
def get_3_channel_number(idx, j):
    digits = list(str(idx))
    digit_images = []
    for d in digits:
        indices = label_indices[int(d)]
        chosen_idx = indices[torch.randint(len(indices), (1,)).item()]
        img = ds.data[chosen_idx]
        img = img.unsqueeze(0)
        digit_images.append(img)
    # this second for is needed to always return numbers with 3 channels, 
    # otherwise 1 and 2 digits numbers will have less than 3 channels
    for d in range(len(digits), max_digits):
        img = torch.zeros((28, 28))
        img = img.unsqueeze(0)
        digit_images.append(img)
    concat_img = torch.cat(digit_images, dim=0)
    img = to_pil(concat_img)
    img.save(os.path.join(save_dir + "/" + str(i), str(j) + ".png"))
    

for i in range(0, 1000):
    if i < 10:
        for j in range(0, 1500):
            get_3_channel_number(i, j)
    elif i < 100:
        for j in range(0, 167):
            get_3_channel_number(i, j)
    else:
        for j in range(0, 17):
            get_3_channel_number(i, j)

### Computing mean and variance of the generated dataset (both horizontal and channel concatenated) <br>
this is optional and since the training is performed using the usual MNIST mean and std as normalization parameters it is better to use them also for the testing

In [None]:
# path to the dataset
dataset_path = "./data/TestWMNIST"

to_tensor = torchvision.transforms.ToTensor()

# list that will contains all the pixels value of one image
pixels = []

for root, _, files in os.walk(dataset_path):
    for file in files:
        if file.lower().endswith((".png")):
            img_path = os.path.join(root, file)
            img = Image.open(img_path).convert("L")  # "L" for grayscale
            img = to_tensor(img)
            pixels.append(img.flatten())

# join all images pixels value in the same array
all_pixels = np.concatenate(pixels)

# computing mean and standard deviation
mean = np.mean(all_pixels, axis=0)
std = np.std(all_pixels, axis=0)

print("Mean:", mean)
print("Standard deviation:", std)


In [None]:
# path to the dataset
dataset_path = "./data/TestCMNIST"

to_tensor = torchvision.transforms.ToTensor()

# list that will contains all the pixels value of one image
pixels = []

for root, _, files in os.walk(dataset_path):
    for file in files:
        if file.lower().endswith((".png")):
            img_path = os.path.join(root, file)
            img = Image.open(img_path)
            img = to_tensor(img)
            pixels.append(img.flatten())

# join all images pixels value in the same array
all_pixels = np.concatenate(pixels)

# computing mean and standard deviation
meanD = np.mean(all_pixels, axis=0)
stdD = np.std(all_pixels, axis=0)

print("Mean:", meanD)
print("Standard deviation:", stdD)