In [None]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("nikitricky/dafont")

print("Path to dataset files:", path)

Downloading from https://www.kaggle.com/api/v1/datasets/download/nikitricky/dafont?dataset_version_number=1...


100%|██████████| 7.23G/7.23G [03:37<00:00, 35.7MB/s]

Extracting files...





Path to dataset files: /root/.cache/kagglehub/datasets/nikitricky/dafont/versions/1


!pip install -q torchvision matplotlib


In [None]:
!pip install -q torchvision matplotlib


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m1.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m115.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m87.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m49.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m15.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
import os
import torch
import torchvision.transforms.functional as TF
from PIL import Image, ImageDraw, ImageFont
from multiprocessing import Pool, cpu_count
from tqdm import tqdm

# === CONFIGURATION ===
FONTS_DIR = "/root/.cache/kagglehub/datasets/nikitricky/dafont/versions/1"
OUTPUT_DIR = "/content/Sequential_English_Characters_Image_0-9_A-Z"
IMG_SIZE = 128
FONT_SIZE = 100
CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === SETUP OUTPUT FOLDERS ===
os.makedirs(OUTPUT_DIR, exist_ok=True)
for char in CHARS:
    os.makedirs(os.path.join(OUTPUT_DIR, char), exist_ok=True)

# === FILTER VALID FONTS ===
def is_valid_font(font_path):
    try:
        _ = ImageFont.truetype(font_path, FONT_SIZE)
        return True
    except:
        return False

def get_all_valid_fonts():
    font_paths = []
    for root, _, files in os.walk(FONTS_DIR):
        for f in files:
            if f.lower().endswith(('.ttf', '.otf')):
                path = os.path.join(root, f)
                if is_valid_font(path):
                    font_paths.append(path)
    return font_paths

valid_fonts = get_all_valid_fonts()
print(f"✅ Total Valid Fonts Found: {len(valid_fonts)}")

# === RENDER EACH FONT WITH INDEX ===
def render_font(args):
    index, font_path = args
    try:
        font = ImageFont.truetype(font_path, FONT_SIZE)
        img_name = f"{index+1}.png"  # Start from 1

        for char in CHARS:
            char_folder = os.path.join(OUTPUT_DIR, char)
            save_path = os.path.join(char_folder, img_name)

            if os.path.exists(save_path):
                continue  # Skip if exists

            image = Image.new('L', (IMG_SIZE, IMG_SIZE), color=255)
            draw = ImageDraw.Draw(image)
            bbox = font.getbbox(char)
            w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
            x = (IMG_SIZE - w) / 2 - bbox[0]
            y = (IMG_SIZE - h) / 2 - bbox[1]
            draw.text((x, y), char, font=font, fill=0)

            tensor = TF.to_tensor(image).to(DEVICE) * 255
            TF.to_pil_image(tensor.byte().squeeze(0)).save(save_path)

    except Exception:
        pass  # Skip bad fonts silently

# === MULTIPROCESSING ===
if __name__ == "__main__":
    font_args = list(enumerate(valid_fonts))  # (index, font_path)
    with Pool(processes=cpu_count()) as pool:
        list(tqdm(pool.imap_unordered(render_font, font_args), total=len(valid_fonts)))


✅ Total Valid Fonts Found: 143733


100%|██████████| 143733/143733 [04:18<00:00, 556.39it/s]


In [None]:
valid_fonts

['/root/.cache/kagglehub/datasets/nikitricky/dafont/versions/1/fonts/fonts/fontself/Fontself.otf',
 '/root/.cache/kagglehub/datasets/nikitricky/dafont/versions/1/fonts/fonts/fontself/Fontself.ttf',
 '/root/.cache/kagglehub/datasets/nikitricky/dafont/versions/1/fonts/fonts/fine_todey/FineTodeyDemoRegular.ttf',
 '/root/.cache/kagglehub/datasets/nikitricky/dafont/versions/1/fonts/fonts/fine_todey/FineTodeyDemoInline.ttf',
 '/root/.cache/kagglehub/datasets/nikitricky/dafont/versions/1/fonts/fonts/corporate_suit/corpoaratesdw.ttf',
 '/root/.cache/kagglehub/datasets/nikitricky/dafont/versions/1/fonts/fonts/quickens/QUICKENS.ttf',
 '/root/.cache/kagglehub/datasets/nikitricky/dafont/versions/1/fonts/fonts/quentine_antelope/QuentineAntelope.otf',
 '/root/.cache/kagglehub/datasets/nikitricky/dafont/versions/1/fonts/fonts/learchitect/LeArchitect.ttf',
 '/root/.cache/kagglehub/datasets/nikitricky/dafont/versions/1/fonts/fonts/woodcutter_summer_shadows/woodcutter Summer Shadows vers2.ttf',
 '/root/

In [None]:

def get_all_valid_fonts():
    font_paths = []
    for root, _, files in os.walk(FONTS_DIR):
        for f in files:
            if f.lower().endswith(('.ttf', '.otf')):
                path = os.path.join(root, f)
                if is_valid_font(path):
                    font_paths.append(path)
    return font_paths

valid_fonts = get_all_valid_fonts()
print(f"✅ Total Valid Fonts Found: {len(valid_fonts)}")


In [None]:
import os
import torch
import torchvision.transforms.functional as TF
from PIL import Image, ImageDraw, ImageFont
from multiprocessing import Pool, cpu_count
from tqdm import tqdm
import random

# === CONFIGURATION ===
FONTS_DIR = "/root/.cache/kagglehub/datasets/nikitricky/dafont/versions/1"
OUTPUT_DIR = "/content/UniqueName_English_Characters_Image_0-9_A-Z"
IMG_SIZE = 128
FONT_SIZE = 100
CHARS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# === SETUP OUTPUT FOLDERS ===
os.makedirs(OUTPUT_DIR, exist_ok=True)
for char in CHARS:
    os.makedirs(os.path.join(OUTPUT_DIR, char), exist_ok=True)

# === FILTER VALID FONTS ===
def is_valid_font(font_path):
    try:
        _ = ImageFont.truetype(font_path, FONT_SIZE)
        return True
    except:
        return False

# === RENDER EACH FONT ===
def render_font(font_path):
    try:
        font_name = os.path.splitext(os.path.basename(font_path))[0].replace(" ", "_").replace("/", "_")
        font = ImageFont.truetype(font_path, FONT_SIZE)

        for char in CHARS:
            char_folder = os.path.join(OUTPUT_DIR, char)
            rand_id = random.randint(100000, 999999)
            image_name = f"{font_name}_{char}_{rand_id}.png"
            save_path = os.path.join(char_folder, image_name)

            if os.path.exists(save_path):
                continue  # Avoid duplicates

            image = Image.new('L', (IMG_SIZE, IMG_SIZE), color=255)
            draw = ImageDraw.Draw(image)
            bbox = font.getbbox(char)
            w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
            x = (IMG_SIZE - w) / 2 - bbox[0]
            y = (IMG_SIZE - h) / 2 - bbox[1]
            draw.text((x, y), char, font=font, fill=0)

            tensor = TF.to_tensor(image).to(DEVICE) * 255
            TF.to_pil_image(tensor.byte().squeeze(0)).save(save_path)

    except Exception:
        pass  # Skip broken fonts silently

# === MULTIPROCESSING ===
if __name__ == "__main__":
    with Pool(processes=cpu_count()) as pool:
        list(tqdm(pool.imap_unordered(render_font, valid_fonts), total=len(valid_fonts)))


 77%|███████▋  | 110140/143733 [03:14<00:55, 606.93it/s]

In [None]:
import os

folder_path = "/content/UniqueName_English_Characters_Image_0-9_A-Z/0"
file_count = len([f for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg', '.jpeg', '.webp'))])

print(f"Total images in '0': {file_count}")


In [None]:
import os

folder_path = "/content/Sequential_English_Characters_Image_0-9_A-Z/0"
file_count = len([f for f in os.listdir(folder_path) if f.endswith(('.png', '.jpg', '.jpeg', '.webp'))])

print(f"Total images in '0': {file_count}")
