# Testset implementation

file to implement the testset on which we will evaluate the CNN

In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
from matplotlib import pyplot as plt
from torch.utils.data import Dataset
import random
from PIL import Image
import numpy as np

In [17]:
from utils_datasets import to_pil

Directory structure generation for number concatenated on the horizontal axis

In [None]:
import os

# Specify the directory name
directory_name = "data/TestWMNIST"

# Create the directory
try:
    os.mkdir(directory_name)
    print(f"Directory '{directory_name}' created successfully.")
except FileExistsError:
    print(f"Directory '{directory_name}' already exists.")
except PermissionError:
    print(f"Permission denied: Unable to create '{directory_name}'.")
except Exception as e:
    print(f"An error occurred: {e}")

In [None]:
smallest_number = 0
largest_number = 999

for i in range(0,999+1):
    # Specify the directory name
    directory_name = "data/TestWMNIST/" + str(i)

    # Create the directory
    try:
        os.mkdir(directory_name)
        print(f"Directory '{directory_name}' created successfully.")
    except FileExistsError:
        print(f"Directory '{directory_name}' already exists.")
    except PermissionError:
        print(f"Permission denied: Unable to create '{directory_name}'.")
    except Exception as e:
        print(f"An error occurred: {e}")

Horizontal concatenated number generation

In [None]:
from PIL import Image

ds = torchvision.datasets.MNIST(
    root='./data',
    train=False,
    transform=None,
    download=True
)

label_indices = { i:torch.where(ds.targets == i)[0] for i in range(10) }
save_dir = "data/TestWMNIST"

def get_1_channel_number(idx, j):
    digits = list(str(idx))
    digit_images = []
    for d in digits:
        indices = label_indices[int(d)]
        chosen_idx = indices[torch.randint(len(indices), (1,)).item()]
        img = ds.data[chosen_idx]
        digit_images.append(img)
    concat_img = torch.cat(digit_images, dim=1)
    img = to_pil(concat_img)
    img.save(os.path.join(save_dir + "/" + str(i), str(j) + ".png"))
    
for i in range(0, 1000):
    if i<10:
        for j in range(0, 1500):
            get_1_channel_number(i, j)
    if i<100:
        for j in range(0, 167):
            get_1_channel_number(i, j)
    else:
        for j in range(0, 17):
            get_1_channel_number(i, j)

Directory structure generation for number concatenated on the channel axis

In [None]:
import os

# Specify the directory name
directory_name = "data/TestDMNIST"

# Create the directory
try:
    os.mkdir(directory_name)
    print(f"Directory '{directory_name}' created successfully.")
except FileExistsError:
    print(f"Directory '{directory_name}' already exists.")
except PermissionError:
    print(f"Permission denied: Unable to create '{directory_name}'.")
except Exception as e:
    print(f"An error occurred: {e}")

In [None]:
smallest_number = 0
largest_number = 999

for i in range(0,999+1):
    # Specify the directory name
    directory_name = "data/TestDMNIST/" + str(i)

    # Create the directory
    try:
        os.mkdir(directory_name)
        print(f"Directory '{directory_name}' created successfully.")
    except FileExistsError:
        print(f"Directory '{directory_name}' already exists.")
    except PermissionError:
        print(f"Permission denied: Unable to create '{directory_name}'.")
    except Exception as e:
        print(f"An error occurred: {e}")

Channel concatenated number generation

In [None]:
from PIL import Image

ds = torchvision.datasets.MNIST(
    root='./data',
    train=False,
    transform=None,
    download=True
)

max_digits = 3

label_indices = { i:torch.where(ds.targets == i)[0] for i in range(10) }
save_dir = "data/TestDMNIST"

def get_3_channel_number(idx, j):
    digits = list(str(idx))
    digit_images = []
    for d in digits:
        indices = label_indices[int(d)]
        chosen_idx = indices[torch.randint(len(indices), (1,)).item()]
        img = ds.data[chosen_idx]
        img = img.unsqueeze(0)
        digit_images.append(img)
    for d in range(len(digits), max_digits):
        img = torch.zeros((28, 28))
        img = img.unsqueeze(0)
        digit_images.append(img)
    concat_img = torch.cat(digit_images, dim=0)
    img = to_pil(concat_img)
    img.save(os.path.join(save_dir + "/" + str(i), str(j) + ".png"))
    

for i in range(0, 1000):
    if i < 10:
        for j in range(0, 1500):
            get_3_channel_number(i, j)
    elif i < 100:
        for j in range(0, 167):
            get_3_channel_number(i, j)
    else:
        for j in range(0, 17):
            get_3_channel_number(i, j)

Computing mean and variance of the generated dataset (both horizontal and channel concatenated)

In [None]:
# path to the dataset
dataset_path = "./data/TestWMNIST"

to_tensor = torchvision.transforms.ToTensor()

pixels = []
i = 0

for root, _, files in os.walk(dataset_path):
    for file in files:
        print(i)
        i += 1
        if file.lower().endswith((".png")):
            img_path = os.path.join(root, file)
            img = Image.open(img_path).convert("L")  # "L" per grayscale
            img = to_tensor(img)
            arr = np.array(img, dtype=np.float32)
            pixels.append(arr.flatten())

# join all images in the same array
all_pixels = np.concatenate(pixels)

# computing mean and variance
mean = np.mean(all_pixels, axis=0)
std = np.var(all_pixels, axis=0)

print("Mean:", mean)
print("Variance:", std)


In [None]:
import os
import numpy as np
from PIL import Image

# path to the dataset
dataset_path = "./data/TestDMNIST"

to_tensor = torchvision.transforms.ToTensor()

pixels = []
i = 0

for root, _, files in os.walk(dataset_path):
    for file in files:
        print(i)
        i += 1
        if file.lower().endswith((".png")):
            img_path = os.path.join(root, file)
            img = Image.open(img_path)
            img = to_tensor(img)
            arr = np.array(img, dtype=np.float32)
            pixels.append(arr.flatten())

# join all images in the same array
all_pixels = np.concatenate(pixels)

# computing mean and variance
meanD = np.mean(all_pixels, axis=0)
stdD = np.var(all_pixels, axis=0)

print("Mean:", meanD)
print("Variance:", stdD)
