# Download & Import Packages

In [None]:
!pip install opencv-python-headless

Collecting opencv-python-headless
  Using cached opencv_python_headless-4.8.0.74-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (49.1 MB)
Installing collected packages: opencv-python-headless
Successfully installed opencv-python-headless-4.8.0.74


In [None]:
from PIL import Image, ImageDraw, ImageFont
import math
import random
import string
import requests
import numpy as np
import math
import os
import cv2
import imageio
import shutil
from concurrent.futures import ThreadPoolExecutor

In [None]:
os.chdir("../Text Detection Project")

# Import Word Lists for Text Generation

Scraped from Wikepedia, GitHub, and other online resources 

In [None]:
# import English word list (10000 entries) for random text generation
with open('Word Lists/English Word List.txt') as f:
    English_list = f.read().split('\n')[:-1]

In [None]:
# import Portuguese word list (4927 entries) for random text generation
with open('Word Lists/Portuguese Word List.txt') as f:
    Portuguese_list = f.read().split('\n')[:-1]

In [None]:
# import Chinese word list (56064 entries) for random text generation
with open('Word Lists/Chinese Word List.txt') as f:
    Chinese_list = f.read().split('\n')[:-1]

# Helper Functions for Random Text Generation

In [None]:
# generate random text block
# supports English, Portuguese, and Chinese
def generate_random_text(length, language):
    if language == 'English':
        word_list = English_list
        text = ' '.join(random.choice(word_list) for _ in range(length))
        return text
    elif language == 'Portuguese':
        word_list = Portuguese_list
        text = ' '.join(random.choice(word_list) for _ in range(length))
        return text
    elif language == 'Chinese':
        word_list = Chinese_list
        text = ' '.join(random.choice(word_list) for _ in range(length))
        return text
    else:
        return 'Language not supported'

In [None]:
# generate a random color in RGB format
def generate_random_color():
    r = random.randint(0, 255)
    g = random.randint(0, 255)
    b = random.randint(0, 255)
    return (r, g, b)

In [None]:
# generate a random grayscale color in RGB format
def generate_random_grayscale_color():
    intensity = random.randint(0, 255)
    return (intensity, intensity, intensity)

In [None]:
# generate a number from Poisson distribution
def generate_poisson(lower_bound = 1, upper_bound = 10, lambd = 2):
    while True:
        sample = np.random.poisson(lambd)
        if lower_bound <= sample <= upper_bound:
            return sample

# Main Function for Random Text Generation

In [None]:
# overlay random text on image
# customize randomness
def overlay_random_text(
    input, output,
    language = 'English',
    font_name =  'Arial Unicode.ttf',
    font_size = 20, # pixels
    text_length = 10, # number of characters
    fill_color = (0, 0, 0), # default black
    stroke_color = (255, 255, 255), # default white
    stroke_width = 1, # pixels
    rotation_degree = 0):

    with Image.open(input) as img:

        # makes sure image is in RGB mode
        if img.mode != 'RGB':
            img = img.convert('RGB')

        draw = ImageDraw.Draw(img)
        font = ImageFont.truetype('Fonts/' + font_name, font_size)
        text = generate_random_text(text_length, language)

        # generate random coordinate (top-left)
        x, y = random.randint(0, img.width), random.randint(0, img.height)

        # generate transparent image to temporarily place text
        temp = Image.new('RGBA', (img.width, img.height), (0, 0, 0, 0))  # transparent black background

        # draw text
        temp_draw = ImageDraw.Draw(temp)
        temp_draw.text((x, y), text, font=font, fill=fill_color, stroke_width=stroke_width, stroke_fill=stroke_color)

        # rotate
        temp = temp.rotate(rotation_degree, fillcolor=(0, 0, 0, 0))

        # find the first and last pixels containing text on y axis and x axis
        data = np.array(temp)
        colored = np.any(data[:, :, :] != [0, 0, 0, 0], axis=2)  # find pixels that are not transparent black
        ## y axis
        reduced_y = np.any(colored, axis=1)
        indices_y = np.where(reduced_y)[0]
        first_index_y = indices_y[0] if indices_y.size else 0
        last_index_y = indices_y[-1] if indices_y.size else 0
        ## x axis
        reduced_x = np.any(colored, axis=0)
        indices_x = np.where(reduced_x)[0]
        first_index_x = indices_x[0] if indices_x.size else 0
        last_index_x = indices_x[-1] if indices_x.size else 0

        # # uncomment when checking manual bounding boxes
        # # draw bounding box
        # draw.rectangle((first_index_x, first_index_y, last_index_x, last_index_y), outline = 'red')

        # paste text on to the image
        img.paste(temp, temp)

        # save image
        img.save(output)
        print(f'Image saved in {output}')

        # calculate center_x, center_y, bbox_width, bbox_height
        # then normalize to (0-1)
        center_x = (last_index_x + first_index_x)/2
        center_x_norm = center_x / img.width
        center_y = (last_index_y + first_index_y)/2
        center_y_norm = center_y / img.height
        bbox_width = last_index_x - first_index_x
        bbox_width_norm = bbox_width / img.width
        bbox_height = last_index_y - first_index_y
        bbox_height_norm = bbox_height / img.height

        return center_x_norm, center_y_norm, bbox_width_norm, bbox_height_norm

# Supplemental Functions

In [None]:
# create a GIF from the images
def create_gif(image_dir, gif_path, duration = 1):
    images = []

    for filename in os.listdir(image_dir):
        file_path = os.path.join(image_dir, filename)
        images.append(imageio.v2.imread(file_path))

    imageio.mimsave(gif_path, images, duration = duration)

In [None]:
# create a mp4 video from the images
def create_video(image_dir, video_path, fps=1):
    images = []

    for filename in os.listdir(image_dir):
        file_path = os.path.join(image_dir, filename)
        images.append(cv2.imread(file_path))

    height, width, layers = images[0].shape
    video = cv2.VideoWriter(video_path, cv2.VideoWriter_fourcc(*'mp4v'), fps, (width, height))

    for image in images:
        video.write(image)

    video.release()

# Preprocess Images (112120 Images Total)

In [None]:
%%time
# crop images to 640 by 640 to get rid of noise (burn in text that we don't have labels for)

source_folder = 'DICOM/Original'
destination_folder = 'DICOM/Cropped'

def process_image_crop(file, subfolder_path, image_counter):
    source = os.path.join(subfolder_path, file)
    
    # Skip directories
    if os.path.isdir(source):
        return
    
    destination = os.path.join(destination_folder, f"image {image_counter:06d}.png")
    
    try:
        img = Image.open(source)
        width, height = img.size  # Assuming 1024 by 1024
        left = 192
        top = 192
        right = 832
        bottom = 832
        img_cropped = img.crop((left, top, right, bottom))  # 640 by 640
        img_cropped.save(destination)
    except Exception as e:
        print(f"Could not process file {file} in {subfolder_name}: {str(e)}")

# Use ThreadPoolExecutor to run the function in multiple threads
NUM_THREADS = 10
image_counter = 1  # Counter to rename images

with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
    # Loop through each subfolder named "images 1" to "images 12"
    for i in range(1, 13):
        subfolder_name = f"images {i}"
        subfolder_path = os.path.join(source_folder, subfolder_name)
        
        # List all files in the subfolder
        files = os.listdir(subfolder_path)
        
        for file in files:
            executor.submit(process_image_crop, file, subfolder_path, image_counter)
            image_counter += 1  # Increment the counter for the next image name


CPU times: user 4h 47min 27s, sys: 2min 19s, total: 4h 49min 47s
Wall time: 1h 18min 9s


# Define Main Function Parameters

In [None]:
languages = ['English', 'Portuguese', 'Chinese']
English_Portuguese_font_list = [
    'Arial Unicode.ttf',
    'Geneva.ttf',
    'Keyboard.ttf',
    'Monaco.ttf',
    'NewYork.ttf',
    'NewYorkItalic.ttf',
    'OpenSans-Light.ttf',
    'OpenSans_Condensed-Bold.ttf',
    'OpenSans_Condensed-BoldItalic.ttf',
    'OpenSans_Condensed-ExtraBold.ttf',
    'OpenSans_Condensed-ExtraBoldItalic.ttf',
    'OpenSans_Condensed-Italic.ttf',
    'OpenSans_Condensed-Light.ttf',
    'OpenSans_Condensed-LightItalic.ttf',
    'OpenSans_Condensed-Medium.ttf',
    'OpenSans_Condensed-MediumItalic.ttf',
    'OpenSans_Condensed-Regular.ttf',
    'OpenSans_Condensed-SemiBold.ttf',
    'OpenSans_Condensed-SemiBoldItalic.ttf',
    'OpenSans_SemiCondensed-Bold.ttf',
    'OpenSans_SemiCondensed-BoldItalic.ttf',
    'OpenSans_SemiCondensed-ExtraBold.ttf',
    'OpenSans_SemiCondensed-ExtraBoldItalic.ttf',
    'OpenSans_SemiCondensed-Italic.ttf',
    'OpenSans_SemiCondensed-Light.ttf',
    'OpenSans_SemiCondensed-LightItalic.ttf',
    'OpenSans_SemiCondensed-Medium.ttf',
    'OpenSans_SemiCondensed-MediumItalic.ttf',
    'OpenSans_SemiCondensed-Regular.ttf',
    'OpenSans_SemiCondensed-SemiBold.ttf',
    'OpenSans_SemiCondensed-SemiBoldItalic.ttf',
    'OpenSans-Bold.ttf',
    'OpenSans-BoldItalic.ttf',
    'OpenSans-ExtraBold.ttf',
    'OpenSans-ExtraBoldItalic.ttf',
    'OpenSans-Italic.ttf',
    'OpenSans-LightItalic.ttf',
    'OpenSans-Medium.ttf',
    'OpenSans-MediumItalic.ttf',
    'OpenSans-Regular.ttf',
    'OpenSans-SemiBold.ttf',
    'OpenSans-SemiBoldItalic.ttf',
    'Roboto-ThinItalic.ttf',
    'Roboto-Thin.ttf',
    'Roboto-Regular.ttf',
    'Roboto-MediumItalic.ttf',
    'Roboto-Medium.ttf',
    'Roboto-LightItalic.ttf',
    'Roboto-Light.ttf',
    'Roboto-Italic.ttf',
    'Roboto-BoldItalic.ttf',
    'Roboto-Bold.ttf',
    'Roboto-BlackItalic.ttf',
    'Roboto-Black.ttf',
    'SFCompact.ttf',
    'SFCompactItalic.ttf',
    'SFCompactRounded.ttf',
    'SFNS.ttf',
    'SFNSItalic.ttf',
    'SFNSMono.ttf',
    'SFNSMonoItalic.ttf',
    'SFNSRounded.ttf'
]
# works for google colab only
Chinese_font_list = [
    'HanyiSentyPagoda Regular.ttf',
    'HanyiSentyPine Regular.ttf',
    'HanyiSentyWen Regular.ttf'
]
font_size_range = (15,50)
stroke_width_range = (0,1)
rotation_degrees = [
    -2, -1, 0, 1, 2,
    88, 89, 90, 91, 92,
    268, 269, 270, 271, 272
]

## Generate Training Set (100,000 Images)

In [None]:
%%time
# use index 1 - 100,000 images
input_path = 'DICOM/Cropped'

def process_image_train(i):
    # input
    image_name = "image {:06d}.png".format(i)  # Use leading zeros for consistent naming
    input = os.path.join(input_path, image_name)
    # output
    output = 'custom_dataset/images/train/train {:06d}.jpg'.format(i)  # Use leading zeros for consistent naming
    # language
    language = random.choice(languages)
    # font name
    if language == 'Chinese':
        font_name = random.choice(Chinese_font_list)
    else:
        font_name = random.choice(English_Portuguese_font_list)
    # font size
    font_size = random.randint(font_size_range[0], font_size_range[1])
    # text length
    text_length = generate_poisson()
    # fill color
    fill_color = generate_random_grayscale_color()
    # stroke color
    stroke_color = generate_random_grayscale_color()
    # stroke width
    stroke_width = random.randint(stroke_width_range[0], stroke_width_range[1])
    # rotation degree
    rotation_degree = random.choice(rotation_degrees)

    # call main function
    center_x, center_y, bbox_width, bbox_height = overlay_random_text(
        input = input,
        output = output,
        language = language,
        font_name = font_name,
        font_size = font_size,
        text_length = text_length,
        fill_color = fill_color,
        stroke_color = stroke_color,
        stroke_width = stroke_width,
        rotation_degree = rotation_degree)

    # save label to txt file with format (class, center_x, center_y, bbox_width, bbox_height)
    filename =  'custom_dataset/labels/train/train {:06d}.txt'.format(i)  # Use leading zeros for consistent naming
    with open(filename, 'w') as file:
        file.write(f'0 {center_x} {center_y} {bbox_width} {bbox_height}')

# Use ThreadPoolExecutor to run the function in multiple threads
NUM_THREADS = 10

with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
    executor.map(process_image_train, range(1, 100001))


## Generate Validation Set (12120 Images)

In [None]:
%%time
# use index 100,001 - 112120 images
input_path = 'DICOM/Cropped'

def process_image_val(i):
    # input
    image_name = "image {:06d}.png".format(i)
    input = os.path.join(input_path, image_name)
    # output
    output = 'custom_dataset/images/val/val {:06d}.jpg'.format(i-100000)
    # language
    language = random.choice(languages)
    # font name
    if language == 'Chinese':
        font_name = random.choice(Chinese_font_list)
    else:
        font_name = random.choice(English_Portuguese_font_list)
    # font size
    font_size = random.randint(font_size_range[0], font_size_range[1])
    # text length
    text_length = generate_poisson()
    # fill color
    fill_color = generate_random_grayscale_color()
    # stroke color
    stroke_color = generate_random_grayscale_color()
    # stroke width
    stroke_width = random.randint(stroke_width_range[0], stroke_width_range[1])
    # rotation degree
    rotation_degree = random.choice(rotation_degrees)

    # call main function
    center_x, center_y, bbox_width, bbox_height = overlay_random_text(
        input = input,
        output = output,
        language = language,
        font_name = font_name,
        font_size = font_size,
        text_length = text_length,
        fill_color = fill_color,
        stroke_color = stroke_color,
        stroke_width = stroke_width,
        rotation_degree = rotation_degree)

    # save label to txt file with format (class, center_x, center_y, bbox_width, bbox_height)
    filename =  'custom_dataset/labels/val/val {:06d}.txt'.format(i-100000)
    with open(filename, 'w') as file:
        file.write(f'0 {center_x} {center_y} {bbox_width} {bbox_height}')

# Use ThreadPoolExecutor to run the function in multiple threads
NUM_THREADS = 10

with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
    executor.map(process_image_val, range(100001, 112121))

# Generate Testing Set

In [None]:
# generate 100 test images from original images
# crop to 640 by 640 (random location)

def process_image_test(i):
    temp_path = f'temp_cropped_image_{i}.png'
    try:
        # input
        random_file = random.choice(input_files)
        input_image = Image.open(os.path.join(input_path, random_file))

        # Crop random 640x640 from the image
        left = random.randint(0, input_image.width - 640)
        upper = random.randint(0, input_image.height - 640)
        right = left + 640
        lower = upper + 640
        cropped_image = input_image.crop((left, upper, right, lower))

        # Save cropped image to a temporary path
        cropped_image.save(temp_path)
    
        # output
        output = 'custom_dataset/images/test/test ' + str(i).zfill(6) + '.jpg'
        # language
        language = random.choice(languages)
        # font name
        if language == 'Chinese':
            font_name = random.choice(Chinese_font_list)
        else:
            font_name = random.choice(English_Portuguese_font_list)
        
        # font size
        font_size = random.randint(font_size_range[0], font_size_range[1])
        # text length
        text_length = generate_poisson()
        # fill color
        fill_color = generate_random_grayscale_color()
        # stroke color
        stroke_color = generate_random_grayscale_color()
        # stroke width
        stroke_width = random.randint(stroke_width_range[0], stroke_width_range[1])
        # rotation degree
        rotation_degree = random.choice(rotation_degrees)

        # call main function
        center_x, center_y, bbox_width, bbox_height = overlay_random_text(
            input = temp_path,
            output = output,
            language = language,
            font_name = font_name,
            font_size = font_size,
            text_length = text_length,
            fill_color = fill_color,
            stroke_color = stroke_color,
            stroke_width = stroke_width,
            rotation_degree = rotation_degree)

        # save label to txt file with format (class, center_x, center_y, bbox_width, bbox_height)
        filename =  'custom_dataset/labels/test/test ' + str(i).zfill(6) + '.txt'
        with open(filename, 'w') as file:
            file.write(f'0 {center_x} {center_y} {bbox_width} {bbox_height}')
        
    except Exception as e:
        print(f"Error processing image {i} with font {font_name}: {e}")
    finally:
        # Ensure the temporary cropped image is removed
        if os.path.exists(temp_path):
            os.remove(temp_path)

# Use ThreadPoolExecutor to run the function in multiple threads
NUM_THREADS = 10

input_path = 'DICOM/Original/images 1'
input_files = os.listdir(input_path)

with ThreadPoolExecutor(max_workers=NUM_THREADS) as executor:
    executor.map(process_image_test, range(1, 101))