# Creazione dataset

## generazione immagini

In [6]:
!pip install Pillow numpy pandas torch torchvision

Collecting torchvision
  Downloading torchvision-0.19.1-cp38-cp38-manylinux1_x86_64.whl (7.0 MB)
[K     |████████████████████████████████| 7.0 MB 1.0 MB/s eta 0:00:01
Installing collected packages: torchvision
Successfully installed torchvision-0.19.1
You should consider upgrading via the '/home/kevin/.pyenv/versions/3.8.10/bin/python3.8 -m pip install --upgrade pip' command.[0m


In [1]:
dataset_dir = f"./digit_dataset"

In [2]:
from PIL import Image, ImageDraw, ImageFont
import os, numpy as np, pandas as pd, torch
import random
from torch.utils.data.dataset import Dataset
import torchvision.transforms as transforms # Import the transforms module

predisporre all'interno della cartella `./fonts` i font da cui verranno generati i caratteri

In [3]:
# Parametri
chars_to_generate = np.array(list("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"))

img_size = (28, 28)

fonts_path = [os.path.join(f"./fonts", file) for file in os.listdir(f"./fonts") if file.endswith(".ttf")]
fonts_name = [os.path.splitext(os.path.basename(file))[0] for file in fonts_path]

font_size = 20

colors = np.array(["white", "black", "red", "blue", "green", "yellow"])
text_colors = colors
background_colors = colors

In [4]:
# Crea la directory principale
if not os.path.exists(dataset_dir):
    os.makedirs(dataset_dir)

In [51]:
dataset_file = os.path.join(dataset_dir, "dataset.txt")

# Apri dataset.txt una sola volta in modalità scrittura
with open(dataset_file, "w", encoding="utf-8") as f:
	for path in fonts_path:
		font_name = os.path.splitext(os.path.basename(path))[0]
		print(font_name)

		for char in chars_to_generate:
			char_dir = os.path.join(dataset_dir, char)
			if not os.path.exists(char_dir):
				os.makedirs(char_dir)

			for text_color in text_colors:
				for background_color in background_colors:
					if text_color == background_color:
						continue

					font = ImageFont.truetype(path, font_size)

					# Crea una nuova immagine per il carattere
					img = Image.new('RGB', img_size, color=background_color)
					draw = ImageDraw.Draw(img)

					# Centra il testo nell'immagine
					try:
						bbox = draw.textbbox((0, 0), char, font=font)
						text_width = bbox[2] - bbox[0]
						text_height = bbox[3] - bbox[1]
						x = (img_size[0] - text_width) / 2 - bbox[0]
						y = (img_size[1] - text_height) / 2 - bbox[1]
					except AttributeError:
						text_width, text_height = draw.textsize(char, font=font)
						x = (img_size[0] - text_width) / 2
						y = (img_size[1] - text_height) / 2

					# Disegna il carattere
					draw.text((x, y), char, fill=text_color, font=font)

					# Salva l'immagine nella cartella del carattere
					img_filename = f"{font_name}_{background_color}_{text_color}.png"
					img_path = os.path.join(char_dir, img_filename)
					img.save(img_path)

					# Scrivi una riga nel dataset.txt
					f.write(f"{char}/{img_filename},{char}\n")

print(f"Dataset generato in: {dataset_dir}")

Roboto-Medium
Muli
Sansation-Light
Oswald-Heavy
PTN57F
Oswald-DemiBold
Antonio-Light
Lato-Black
PTN77F
Sansation-Regular
JosefinSans-Bold
PTS75F
Lato-Semibold
Oswald-Bold
Oswald-Light
OpenSans-Regular
PTC55F
Antonio-Regular
Muli-Light
RobotoCondensed-Light
Muli-Bold
Lato-Heavy
Roboto-Thin
JosefinSans-Thin
Lato-Thin
Lato-Bold
RobotoCondensed-Regular
Caviar_Dreams_Bold
Roboto-Black
JosefinSans-Regular
OpenSans-Light
OpenSans-ExtraBold
Oswald-ExtraLight
Roboto-Regular
JosefinSans-SemiBold
PTC75F
Oswald-Stencil
Antonio-Bold
Muli-ExtraLight
Roboto-Bold
Oswald-Medium
JosefinSans-Light
OpenSans-Bold
Lato-Medium
PTS56F
PTS55F
Courier Prime
Oswald-Regular
Lato-Light
Lato-Regular
OpenSans-Semibold
Muli-SemiBold
RobotoCondensed-Bold
Roboto-Light
Lato-Hairline
ostrich-regular
CaviarDreams
Courier Prime Bold
Sansation-Bold
Dataset generato in: ./digit_dataset


dividiamo il file `dataset.txt` in due file `train.txt` e `test.txt`

In [52]:
train_file = os.path.join(dataset_dir, "train.txt")
test_file = os.path.join(dataset_dir, "test.txt")

# Leggi tutte le righe
with open(dataset_file, "r", encoding="utf-8") as f:
    lines = f.readlines()

# Mischia le righe casualmente
random.shuffle(lines)

# Dividi 80% train e 20% test
split_idx = int(0.8 * len(lines))
train_lines = lines[:split_idx]
test_lines = lines[split_idx:]

# Scrivi i file train.txt e test.txt
with open(train_file, "w", encoding="utf-8") as f:
    f.writelines(train_lines)

with open(test_file, "w", encoding="utf-8") as f:
    f.writelines(test_lines)

creare un file `classes.txt` che conterrà in ogni riga i nomi dei file dataset.

Quindi avremo questo contenuto:
```
train.txt
test.txt
```

**TODO**: da capire se dobbiamo mettere il validation

## pytorch dataset

In [56]:
class DigitDataset(Dataset):
	def __init__(self, imgs_path, txt_path, transform=None):
		self.imgs_path = imgs_path
		self.images = np.loadtxt(txt_path, dtype=str, delimiter=',')
		self.transform = transform

	def __getitem__(self, index):
		# carica il path e indice dell'immagine
		f, c = self.images[index]

		im = Image.open(f'{self.imgs_path}/{f}')

		if self.transform is not None:
			im = self.transform(im)

		return {
			"image": im,
			"label": c
		}

	def __len__(self):
		return len(self.images)

In [57]:
train_path = f'{dataset_dir}/train.txt'

dataset = DigitDataset(imgs_path=dataset_dir, txt_path=train_path, transform=transforms.ToTensor())

In [58]:
sample = dataset[0]
print(sample['image'].shape)
print(sample['label'])

torch.Size([3, 28, 28])
S


## data normalization

In [60]:
x_pixels_count = int(sample['image'].shape[1])
y_pixels_count = int(sample['image'].shape[2])
x_pixels_count, y_pixels_count

(28, 28)

media

In [61]:
m = np.zeros(3)
for sample in dataset:
  m += sample['image'].sum(1).sum(1).numpy()

m = m / (len(dataset) * x_pixels_count * y_pixels_count)

In [62]:
"media", m

('media', array([0.49894239, 0.41612808, 0.33276246]))

varianza

In [63]:
s = np.zeros(3)
for sample in dataset:
  s += ((sample['image'] - torch.Tensor(m).view(3,1,1)) ** 2).sum(1).sum(1).numpy()

s = np.sqrt(s / (len(dataset) * x_pixels_count * y_pixels_count))

In [64]:
"varianza", s

('varianza', array([0.49364248, 0.44286803, 0.46521507]))

normalizziamo

In [65]:
transform = transforms.Compose([
    transforms.Resize(28),
    transforms.ToTensor(),
    transforms.Normalize(m, s),
    transforms.Lambda(lambda x: x.view(-1))
])

dataset = DigitDataset(imgs_path=dataset_dir, txt_path=train_path, transform=transform)
print(dataset[0]['image'].shape)
print(dataset[0]['label'])

torch.Size([2352])
S
