In [1]:
# pip install trdg

# Imports
import copy
import torch
import random
import pathlib

import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation

import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

from torchvision import transforms
from torchvision.datasets import ImageFolder

from tqdm.auto import tqdm
from IPython.display import HTML, display

from trdg.generators import GeneratorFromStrings
from PIL import Image
import os
import csv
import string

In [2]:
# @title Set random seed
# @markdown Executing `set_seed(seed=seed)` you are setting the seed

# For DL its critical to set the random seed so that students can have a
# baseline to compare their results to expected results.
# Read more here: https://pytorch.org/docs/stable/notes/randomness.html

# Call `set_seed` function in the exercises to ensure reproducibility.
import random
import torch

def set_seed(seed=None, seed_torch=True):
  """
  Function that controls randomness. NumPy and random modules must be imported.

  Args:
    seed : Integer
      A non-negative integer that defines the random state. Default is `None`.
    seed_torch : Boolean
      If `True` sets the random seed for pytorch tensors, so pytorch module
      must be imported. Default is `True`.

  Returns:
    Nothing.
  """
  if seed is None:
    seed = np.random.choice(2 ** 32)
  random.seed(seed)
  np.random.seed(seed)
  if seed_torch:
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.cuda.manual_seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

  print(f'Random seed {seed} has been set.')


# In case that `DataLoader` is used
def seed_worker(worker_id):
  """
  DataLoader will reseed workers following randomness in
  multi-process data loading algorithm.

  Args:
    worker_id: integer
      ID of subprocess to seed. 0 means that
      the data will be loaded in the main process
      Refer: https://pytorch.org/docs/stable/data.html#data-loading-randomness for more details

  Returns:
    Nothing
  """
  worker_seed = torch.initial_seed() % 2**32
  np.random.seed(worker_seed)
  random.seed(worker_seed)

In [3]:
# @title Set device (GPU or CPU). Execute `set_device()`
# especially if torch modules used.

# Inform the user if the notebook uses GPU or CPU.

def set_device():
  """
  Set the device. CUDA if available, CPU otherwise

  Args:
    None

  Returns:
    Nothing
  """
  device = "cuda" if torch.cuda.is_available() else "cpu"
  if device != "cuda":
    print("WARNING: For this notebook to perform best, "
        "if possible, in the menu under `Runtime` -> "
        "`Change runtime type.`  select `GPU` ")
  else:
    print("GPU is enabled in this notebook.")

  return device

In [4]:
SEED = 2021
set_seed(seed=SEED)
DEVICE = set_device()

Random seed 2021 has been set.


In [6]:
# %cd "/content/drive/MyDrive/Colab Notebooks/OCR_proj/trdg"

[Errno 2] No such file or directory: '/content/drive/MyDrive/Colab Notebooks/OCR_proj/trdg'
/content


In [5]:
# Define output directory and CSV file path
output_dir = "ocr_dataset4"
os.makedirs(output_dir, exist_ok=True)
csv_file = os.path.join(output_dir, "labels.csv")

# Function to generate random words
def generate_random_word(length=10):
    letters = string.ascii_lowercase + ' '
    return ''.join(random.choice(letters) for i in range(length))

# Function to save images with labels
def save_handwritten_text_images(output_dir, csv_file, num_samples=10):
    # Generate random words
    random_words = [generate_random_word(10) for _ in range(num_samples)]

    # Create generator for handwritten text
    generator = GeneratorFromStrings(
        random_words,
        blur=0,  # No blur
        random_blur=False,
        distorsion_type=0,  # No distortion
        size=32,  # Font size
        language="en"  # Language set to English
    )

    labels = []
    fixed_width = 256
    fixed_height = 56

    for count, (img, lbl) in enumerate(tqdm(generator, total=num_samples, desc="Creating Datas")):
        if count >= num_samples:
            break
        img = img.convert("L")  # Convert image to grayscale
        # Resize the image to fixed dimensions
        img = img.resize((fixed_width, fixed_height), Image.ANTIALIAS)
        # Save image with related filename
        img_filename = os.path.join(output_dir, f"{count+1}.png")
        img.save(img_filename)
        # print(f"Saved {img_filename} with label {lbl}")
        labels.append(lbl)

    # Save labels to CSV
    with open(csv_file, mode='w', newline='') as file:
        writer = csv.writer(file)
        writer.writerow(["Label"])
        for label in labels:
            writer.writerow([label])

# Generate and save images
num_samples = 1000 ## USER 20 000
save_handwritten_text_images(output_dir, csv_file, num_samples=num_samples)

Creating Datas:   0%|          | 0/1000 [00:00<?, ?it/s]

In [11]:
# apply corruptions to the preprocess
from torchvision import transforms
from PIL import Image
import torch
import matplotlib.pyplot as plt

num_samples = 100

# 0 : rotation
# 1 : affine tsf
# 2 Gaussian blur
# ...

## USER
tsf = 0 # type of corruption
p = 0.1 # intensity of corruption
##

float_to_odd_number = lambda float_value: (lambda n: n if n % 2 != 0 else n + 1 if n < float_value else n - 1)(int(round(float_value)))

corr_list = [transforms.RandomRotation(degrees=p*20), transforms.RandomAffine(degrees=p*20), transforms.GaussianBlur(float_to_odd_number(p*20))]


# compose transformations including the specified corruption
transform = transforms.Compose([
                                 corr_list[tsf],
                                 transforms.ToTensor()
                                ])

names = [str(x) + '.png' for x in range(1, num_samples+1)]
paths = [os.path.join(output_dir, name) for name in names]
img_dataset = []

for path in paths:
  img = Image.open(path)
  # Data Augmentation using transforms
  img_dataset.append(transform(img))
img_dataset = torch.stack(img_dataset)

# for image in img_dataset[:10]:
#   print(image.shape)
#   print(torch.min(image), torch.max(image))
#   plt.imshow(image.permute(1, 2, 0), cmap='gray')
#   plt.show()

In [12]:
# @title Dataloaders for the Dataset
## Dataloaders for the Dataset
batch_size = 128
# classes = ('cat', 'dog', 'wild')

# train_transform = transforms.Compose([
#      transforms.ToTensor(),
#      transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
#      ])
# data_path = pathlib.Path('.')/'afhq' # Using pathlib to be compatible with all OS's
# img_dataset = ImageFolder(data_path/'train', transform=train_transform)


####################################################
g_seed = torch.Generator()
g_seed.manual_seed(SEED)


## Dataloaders for the  Original Dataset
print(type(img_dataset))
img_train_data, img_val_data, img_test_data = torch.utils.data.random_split(img_dataset,
                                                                           [0.2*num_samples, 0.1*num_samples, 0.7*num_samples])

# Creating train_loader and Val_loader
train_loader = torch.utils.data.DataLoader(img_train_data,
                                           batch_size=batch_size,
                                           worker_init_fn=seed_worker,
                                           num_workers=2,
                                           generator=g_seed)
val_loader = torch.utils.data.DataLoader(img_val_data,
                                         batch_size=1000,
                                         num_workers=2,
                                         worker_init_fn=seed_worker,
                                         generator=g_seed)

# Creating test dataset
# test_transform = transforms.Compose([
    #  transforms.ToTensor(),
    #  transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    #  ])
# img_test_dataset = ImageFolder(data_path/'val', transform=test_transform)

<class 'torch.Tensor'>


TypeError: randperm() received an invalid combination of arguments - got (float, generator=torch._C.Generator), but expected one of:
 * (int n, *, torch.Generator generator, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
 * (int n, *, Tensor out, torch.dtype dtype, torch.layout layout, torch.device device, bool pin_memory, bool requires_grad)
