## This notebook can be used to create the DeepSVG meta file for a dataset.

It should be run using the environment created from the requirements.txt file in the SVGRepresentation folder.

The following code expects the folder 'SVG_Data' to be in the same directory as the cloned project repository.

```
parent
    ├── SVG_Data
    └── SVG_LogoGenerator
```

If this is not the case on your machine make sure to
* set the correct path to the SVG_Data folder in the first cell of this notebook.

Outputs will be written to the path specified in variable `output_meta_file_folder`. By default, this is `SVG_Data/data_for_training_deepsvg_model/WorldVector_SVGLogo_preprocessed_filtered_combined_meta`

In [1]:
datafolder = "../../../SVG_Data/"

In [2]:
import glob
import logging
import os
import sys
from concurrent import futures

import pandas as pd
from tqdm import tqdm

sys.path.insert(0, "../deepsvg")
from SVGRepresentation.deepsvg.deepsvg.svglib.svg import SVG

In [3]:
processed_svgs_folder = os.path.join(datafolder, "data_for_training_deepsvg_model/WorldVector_SVGLogo_preprocessed_filtered_combined")
output_meta_file_folder = os.path.join(datafolder, 'data_for_training_deepsvg_model/WorldVector_SVGLogo_preprocessed_filtered_combined_meta')

os.makedirs(output_meta_file_folder, exist_ok=True)

NUM_WORKERS = 32

## Helper functions

In [4]:
def get_svg_stats(svg_file):
    filename = os.path.splitext(os.path.basename(svg_file))[0]

    try:
        svg = SVG.load_svg(svg_file)

        len_groups = [path_group.total_len() for path_group in svg.svg_path_groups]

        return {
            "id": filename,
            "total_len": sum(len_groups),
            "nb_groups": len(len_groups),
            "len_groups": len_groups,
            "max_len_group": max(len_groups)
        }

    except Exception:
        print("No meta info produced for: ", svg_file)
        return None

In [5]:
def create_meta_file(processed_svgs_folder, output_meta_file_folder):
    with futures.ThreadPoolExecutor(max_workers=NUM_WORKERS) as executor:
        svg_files = glob.glob(os.path.join(processed_svgs_folder, "*.svg"))
        meta_data = {}

        with tqdm(total=len(svg_files)) as pbar:
            preprocess_requests = [executor.submit(get_svg_stats, svg_file) for svg_file in svg_files]

            for future in futures.as_completed(preprocess_requests):
                pbar.update(1)

                result = future.result()
                if result is not None:
                    meta_data[result["id"]] = result

    df = pd.DataFrame(meta_data.values())
    df.to_csv(os.path.join(output_meta_file_folder, f"meta_{len(df)}.csv"), index=False)

    logging.info("Meta file creation complete.")

## Create the meta file

In [None]:
create_meta_file(processed_svgs_folder, output_meta_file_folder)

  1%|          | 894/91783 [00:03<05:59, 252.81it/s]


No meta info produced for:  ../../../SVG_Data/data_for_training_deepsvg_model/WorldVector_SVGLogo_preprocessed_filtered_combined\Alejo%20y%20Valentina.svg


## Show the stats for one logo:

In [7]:
get_svg_stats(os.path.join(processed_svgs_folder, "postman-icon.svg"))

{'id': 'postman-icon',
 'total_len': 40,
 'nb_groups': 5,
 'len_groups': [15, 7, 5, 5, 8],
 'max_len_group': 15}