In [None]:
import cv2
import numpy as np
import zipfile
import os
import gdown
from matplotlib import pyplot as plt
from tqdm import tqdm
import shutil

# Creating Textures - CVL Dataset

## Extracting Lines

In [None]:
file_id = "file_id"
gdown.download(f"https://drive.google.com/uc?id={file_id}", "binary_cvl.zip", quiet=False)

with zipfile.ZipFile("binary_cvl.zip", 'r') as zip_ref:
    zip_ref.extractall("cvl_dataset")

## Extracting Words

In [None]:
def segment_words(lines):
    cropped_words = []
    for line in lines:
        vertical_projection = np.sum(line, axis=0)
        word_threshold = 44000
        word_start = None
        words = []
        consecutive_count = 0
        min_consecutive = 20
        for i, value in enumerate(vertical_projection):
            if value < word_threshold:
                consecutive_count = 0
                if word_start is None:
                    word_start = i
            elif value >= word_threshold:
                if word_start is not None:
                    consecutive_count += 1
                    if consecutive_count >= min_consecutive:
                        words.append((word_start, i - min_consecutive + 1))
                        word_start = None
                        consecutive_count = 0
        for word in words:
            start, end = word
            cropped_words.append(line[:, start:end])

    return cropped_words

## Generating Texture - Line Filled

In [None]:
def generate_texture(cropped_words, file_name="unknown"):
    canvas_height = 950
    canvas_width = 950
    if not cropped_words:
        print(f"[WARNING] No cropped words for file: {file_name}")
        return np.ones((canvas_height, canvas_width), dtype=np.uint8) * 255

    row_spacing = -100
    column_spacing = 0

    rows = []
    current_row = []
    current_width = 0

    for word in cropped_words:
        word_height, word_width = word.shape

        while word_width > 0:
            remaining_space = canvas_width - current_width

            if remaining_space >= word_width:
                current_row.append(word)
                current_width += word_width + column_spacing
                break
            else:
                if remaining_space > 0:
                    word_part = word[:, :remaining_space]
                    current_row.append(word_part)
                    word = word[:, remaining_space:]
                    word_width = word.shape[1]
                rows.append(current_row)
                current_row = []
                current_width = 0

    if not rows:
        print(f"[WARNING] No rows could be constructed from the cropped words in file: {file_name}")
        return np.ones((canvas_height, canvas_width), dtype=np.uint8) * 255

    if current_row:
        rows.append(current_row)


    used_width = sum(w.shape[1] for w in rows[-1]) + column_spacing * (len(rows[-1]) - 1)
    remaining_space = canvas_width - used_width
    for w in rows[0]:
      if remaining_space <= 0:
          break
      h, w_w = w.shape
      if w_w + column_spacing <= remaining_space:
          rows[-1].append(w)
          remaining_space -= (w_w + column_spacing)
      else:
          slice_width = remaining_space
          rows[-1].append(w[:, :slice_width])
          remaining_space = 0


    texture = np.ones((canvas_height, canvas_width), dtype=np.uint8) * 255

    y_offset = 0
    row_index = 0

    while y_offset < canvas_height:
        row = rows[row_index % len(rows)]  
        x_offset = 0
        max_row_height = max(word.shape[0] for word in row)

        if y_offset + max_row_height > canvas_height:
            break

        for word in row:
            word_height, word_width = word.shape
            if y_offset + word_height <= canvas_height and x_offset + word_width <= canvas_width:
                word_text = np.ones((canvas_height, canvas_width), dtype=np.uint8) * 255
                word_text[y_offset:y_offset + word_height, x_offset:x_offset + word_width] = word
                texture = cv2.bitwise_not(cv2.bitwise_or(cv2.bitwise_not(texture), cv2.bitwise_not(word_text)))
            x_offset += word_width + column_spacing

        y_offset += max_row_height + row_spacing
        row_index += 1

    crop_size = 900

    h, w = texture.shape
    y_start = (h - crop_size) // 2
    x_start = (w - crop_size) // 2
    y_end   = y_start + crop_size
    x_end   = x_start + crop_size

    texture = texture[
        y_start : y_end,
        x_start : x_end
    ]

    return texture

In [None]:
def split_texture(texture):

    patch_size = 450
    patches = []

    for i in range(2):
        for j in range(2):
            y_start = i * patch_size
            x_start = j * patch_size
            patch = texture[y_start:y_start + patch_size, x_start:x_start + patch_size]
            patches.append(patch)
    return patches

## Creating Textures for all samples

In [None]:
from tqdm import tqdm

input_root = 'cvl_dataset/data/binary_cvl'
output_root = 'texture_cvl'

for writer_id in tqdm(sorted(os.listdir(input_root))):
    writer_path = os.path.join(input_root, writer_id)
    if not os.path.isdir(writer_path):
        continue

    for filename in sorted(os.listdir(writer_path)):
        if not filename.endswith(".png"):
            continue

        image_path = os.path.join(writer_path, filename)

        try:
            image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)

            cleaned_lines, _ = segment_lines(image)

            cropped_words = segment_words(cleaned_lines)

            texture = generate_texture(cropped_words,file_name = image_path)

            patches = split_texture(texture)

            parts = filename.split(".")
            base_id = parts[0]
            writer_id = parts[0].split("_")[0]
            # sample_id = parts[1]
            # speed = parts[2].split(".")[0]

            save_dir = os.path.join(output_root,writer_id,base_id)
            os.makedirs(save_dir, exist_ok=True)

            for i, patch in enumerate(patches):
                patch_filename = f"{base_id}_T{i+1}.png"
                patch_path = os.path.join(save_dir, patch_filename)
                cv2.imwrite(patch_path, patch)

        except Exception as e:
            print(f"[ERROR] Failed processing: {image_path}")
            print(f"        {type(e).__name__}: {e}")

 48%|████▊     | 149/309 [01:25<01:24,  1.88it/s]



 63%|██████▎   | 194/309 [01:51<00:54,  2.09it/s]



100%|██████████| 309/309 [02:53<00:00,  1.78it/s]


## Upload Textures to Drive

In [None]:
import shutil
shutil.make_archive("texture_cvl", 'zip', "texture_cvl")

from google.colab import files
files.download("texture_cvl.zip")

from google.colab import drive
drive.mount('/content/drive')

target_directory = "/content/drive/MyDrive/PATH"
!cp texture_cvl.zip "{target_directory}/"