## This notebook can be used to preprocess SVGs.

This notebook requires a GUI to work.

It should be run using the environment created from the requirements.txt file in the SVGRepresentation folder.

The following code expects the folder 'SVG_Data' to be in the same directory as the cloned project repository.
```
parent/
    ├── SVG_Data
    └── SVG_LogoGenerator
```
If this is not the case on your machine make sure to
* set the correct path to the SVG_Data folder in the first cell of this notebook.

Outputs will be written to the `SVG_Data` folder under `SVGs_transforms_removed`
```
parent/
    └── SVG_Data
    	├── SVGs_transforms_removed/WorldVector_no_transforms
    	├── SVGs_transforms_removed/WorldVector_preprocessed
    └── SVG_LogoGenerator
```


In [20]:
datafolder = "../../../SVG_Data/"

In [21]:
import os
import sys

sys.path.insert(0, "../deepsvg")
sys.path.insert(0, "../../animate_logos_main_adapted")

from inkscape_batch_remove import inkscape_remove_transform
from concurrent import futures
import glob
from tqdm import tqdm

from SVGRepresentation.deepsvg.deepsvg.svglib.svg import SVG

from animate_logos_main_adapted.src.preprocessing.sort_paths import Selector

## Helpers:

In [22]:
NUM_WORKERS = 32

MSE_MIN_ACCURACY = 0.98

SVG_MAX_GROUPS = 30
SVG_MAX_COMMANDS = 200

In [23]:
def create_dir(directory: str) -> str:
    os.makedirs(directory, exist_ok=True)
    return directory

In [24]:
input_folder_path = os.path.abspath(os.path.join(datafolder, 'raw/WorldVector/Worldvector'))
no_transforms_path = create_dir(os.path.abspath(os.path.join(datafolder, 'SVGs_transforms_removed/WorldVector_no_transforms')))
output_folder_path = create_dir(os.path.abspath(os.path.join(datafolder, 'SVGs_transforms_removed/WorldVector_preprocessed')))

temp_folder_path = create_dir(os.path.abspath(os.path.join(datafolder, 'temp')))


In [25]:
def remove_paths_mse(svg_file: str, mse_threshold: float) -> str:
    truncated_svgs_directory = create_dir(os.path.join(temp_folder_path,'truncated_svgs'))
    selector = Selector(
        input_folder_path,
        create_dir(os.path.join(temp_folder_path,'path_selection')),
        truncated_svgs_directory
    )

    filename_without_ending = os.path.basename(svg_file)[:-4]
    filenames_list = [filename_without_ending]

    selector.delete_paths_in_logos(filenames_list)
    selector.truncate_svgs(input_folder_path, filenames_list, coverage_percent=mse_threshold)

    truncated_svg_file_name = os.path.join(truncated_svgs_directory, f'{filename_without_ending}_truncated.svg')
    return truncated_svg_file_name

In [26]:
def read_file_content(file_path: str) -> str:
    with open(file_path, "r") as f:
        return f.read()

In [27]:
def write_file_content(file_path: str, content: str):
    with open(file_path, "w") as f:
        f.write(content)

In [28]:
def preprocess_svg(svg_content: str) -> SVG:
    svg = SVG.from_str(svg_content)
    svg.fill_(False)
    svg.to_path()
    svg.normalize()
    svg.zoom(0.9)
    svg.canonicalize()
    svg = svg.simplify_heuristic()
    return svg

In [29]:
def should_process_svg(svg: SVG) -> bool:
    return svg.total_length() <= SVG_MAX_COMMANDS and len(svg.svg_path_groups) <= SVG_MAX_GROUPS

In [30]:
def has_transform(svg: str) -> bool:
    return "transform" in svg

In [31]:
def process_svg(input_file, no_transforms_folder, output_folder):
    filename = os.path.basename(input_file)
    no_transforms_file = os.path.join(no_transforms_folder, filename)
    output_file = os.path.join(output_folder, filename)

    if os.path.exists(output_file):
        return True

    svg_file_mse_removed = remove_paths_mse(input_file, MSE_MIN_ACCURACY)
    svg_mse_removed = read_file_content(svg_file_mse_removed)

    svg_mse_preprocessed = preprocess_svg(svg_mse_removed)

    if not should_process_svg(svg_mse_preprocessed):
        # print("File %s contains too many groups (%d) or commands (%d)" % (input_file, len(svg_mse_preprocessed.svg_path_groups), svg_mse_preprocessed.total_length()))
        return False

    write_file_content(no_transforms_file, svg_mse_removed)

    if has_transform(svg_mse_removed):
        inkscape_remove_transform(no_transforms_file, no_transforms_file)
        svg_no_transform = read_file_content(no_transforms_file)
        svg_mse_preprocessed = preprocess_svg(svg_no_transform)

    # write final file
    svg_mse_preprocessed.save_svg(output_file)
    return True

In [32]:
def process_directory(input_folder, no_transforms_folder, output_folder, skip_first=0):
    svg_files = glob.glob(os.path.join(input_folder, "**.svg"))

    # sort by file size
    print("Sorting input files by size...")
    sorted_svg_files = sorted(svg_files, key=lambda x: os.stat(x).st_size)
    sorted_svg_files = sorted_svg_files[skip_first:]
    print("Finished sorting, start processing...")

    with tqdm(total=len(sorted_svg_files)) as pbar:

        with futures.ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
            all_futures = []

            for svg_file in sorted_svg_files:
                all_futures.append(executor.submit(process_svg, svg_file, no_transforms_folder, output_folder))

            print("added all futures...")

            for _ in futures.as_completed(all_futures):
                pbar.update(1)


# Execution

## Run single file (for testing)

In [33]:
process_svg(os.path.join(datafolder,'raw/SVG_Logo/1-1.svg'), no_transforms_path, output_folder_path)

True

## Run directory

In [13]:
process_directory(input_folder_path, no_transforms_path, output_folder_path, skip_first=16961 + 32)

Sorting input files by size...


  0%|          | 0/89678 [00:00<?, ?it/s]

Finished sorting, start processing...
added all futures...


100%|██████████| 89678/89678 [31:32:33<00:00,  1.27s/it]   
