In [2]:
pwd

'/Users/samailguliyev/SVG_LogoGenerator/animate_logos_main_adapted/src/preprocessing'

In [4]:

import pickle
import os
os.chdir("..")
os.chdir("..")

from os import listdir
from os.path import isfile, join
from xml.dom import minidom
from pathlib import Path
from matplotlib import image
from datetime import datetime
from shutil import copyfile
from skimage.metrics import mean_squared_error
from src.utils import logger
from src.data.svg_to_png import convert_svgs_in_folder

'/Users/samailguliyev/SVG_LogoGenerator'

In [21]:
# auxiliary function. It is used inside of a class already

def _get_number_of_paths_for_truncation(ordered_relevance_scores_list, coverage_percent):
    """
    args:
    ordered_relevance_scores_list - list of mse values
    coverage_percent - is a float mse threshold
    d between 0 and 1(e.g: 0.60, 0.999) 
    
    return:
    a number of paths to keep """
    
    sum_list = sum(ordered_relevance_scores_list)
    elements_sum = 0
    
    for i,number in enumerate(ordered_relevance_scores_list):
        elements_sum = elements_sum + number
        if elements_sum > coverage_percent*sum_list:
            print(f"number of path to truncate: {i + 1} out of {len(ordered_relevance_scores_list)}")
        
            return i + 1

In [22]:
class Selector_singe_Logo ():

    """ Selector class for path relevance ordering. """

    def __init__(self,  logo_path, dir_svgs='./data/svgs', dir_path_selection='./data/path_selection',
                 dir_truncated_svgs='./data/truncated_svgs', dir_selected_paths='./data/selected_paths',
                 dir_decomposed_svgs='./data/decomposed_svgs', threshold = 0.5):
        """
        Args:
            dir_svgs (str): Directory containing SVGs to be sorted.
            dir_path_selection (str): Directory of logo folders containing PNGs of deleted paths.
            dir_truncated_svgs (str): Directory containing truncated SVGs to most relevant paths.
            dir_selected_paths (str): Directory containing decomposed SVGs selected by relevance ordering.
            dir_decomposed_svgs (str): Directory containing decomposed SVGs of all paths.

        """
        self.dir_svgs = dir_svgs
        self.dir_path_selection = dir_path_selection
        self.dir_truncated_svgs = dir_truncated_svgs
        self.dir_selected_paths = dir_selected_paths
        self.dir_decomposed_svgs = dir_decomposed_svgs
        self.threshold = threshold
        self.logo_path = logo_path
        self.logo_filename = os.path.split(logo_path)[-1][:-4] 
        
        
        
        
        
        

    @staticmethod
    def get_elements(doc):
        """ Retrieve all animation relevant elements from SVG.

        Args:
            doc (xml.dom.minidom.Document): XML minidom document from which to retrieve elements.

        Returns:
            list (xml.dom.minidom.Element): List of all elements in document

        """
        
        return doc.getElementsByTagName('path') + doc.getElementsByTagName('circle') + doc.getElementsByTagName(
            'ellipse') + doc.getElementsByTagName('line') + doc.getElementsByTagName(
            'polygon') + doc.getElementsByTagName('polyline') + doc.getElementsByTagName(
            'rect') + doc.getElementsByTagName('text')
    
    
    
    

    def delete_paths(self):
        """ Function to iteratively delete single paths in an SVG and save remaining logo as PNG
        to Selector.dir_path_selection. Requires directory Selector.dir_decomposed_svgs.

        Args:
            logo (str): Name of logo (without file type ending).

        """
        logo = self.logo_filename
        Path(f'{self.dir_path_selection}/{logo}').mkdir(parents=True, exist_ok=True)
        
        doc = minidom.parse(self.logo_path)
        nb_original_elements = len(self.get_elements(doc))
        with open(f'{self.dir_path_selection}/{logo}/original.svg', 'wb') as file:
            file.write(doc.toprettyxml(encoding='iso-8859-1'))
        doc.unlink()
        for i in range(nb_original_elements):
            doc = minidom.parse(f'{self.dir_path_selection}/{logo}/original.svg')
            elements = self.get_elements(doc)
            path = elements[i]
            parent = path.parentNode
            parent.removeChild(path)
            with open(f'{self.dir_path_selection}/{logo}/without_id_{i}.svg', 'wb') as file:
                file.write(doc.toprettyxml(encoding='iso-8859-1'))
            doc.unlink()
        convert_svgs_in_folder(f'{self.dir_path_selection}/{logo}')
        
        
    

        
        
        
        
        
        
        

    def delete_paths_in_logos(self, logos):
        """ Iterate over list of logos to apply deletion of paths.

        Args:
            logos (list (str)): List of logos (without file type ending).

        """
        start = datetime.now()
        n_logos = len(logos)
        for i, logo in enumerate(logos):
            if i % 20 == 0:
                logger.info(f'Current logo {i+1}/{n_logos}: {logo}')
            self.delete_paths(logo)
        logger.info(f'Time: {datetime.now() - start}')

        
        
    @staticmethod
    def sort_by_relevance(path_selection_folder, excluded_paths, coverage_percent, nr_paths_trunc = 20) :
        """ Sort paths in an SVG by relevance. Relevance of the path is measured by the MSE between the
        original logo and the logo resulting when deleting the path.
        The higher the MSE, the more relevant the given path.

        Args:
            path_selection_folder (str): Path to folder containing PNGs of the original logo and of the resulting logos
            when deleting each path.
            excluded_paths (list (int)): List of animation IDs that should not be considered as relevant. These paths
            will be assigned a relevance score of -1.
            nr_paths_trunc (int): Number of paths that should be kept as the most relevant ones.

        Returns:
            list (int), list(int), list (int), list (int): List of animation IDs sorted by relevance (descending),
            sorted list of MSE scores (descending), list of MSE scores of paths that were missed, list of animation IDs
            of paths that were misses due to exclusion.

        """
        nr_paths = len([name for name in os.listdir(path_selection_folder)
                        if os.path.isfile(os.path.join(path_selection_folder, name))]) - 1
        relevance_scores = []
        missed_scores, missed_paths = [], []
        img_origin = image.imread(os.path.join(path_selection_folder, "original.png"))
        logo = path_selection_folder.split('/')[-1]
        counter = 0
        for i in range(nr_paths):
            img_reduced = image.imread(os.path.join(path_selection_folder, "without_id_{}.png".format(i)))
            try:
                decomposed_id = f'{logo}_{i}'
                if decomposed_id in excluded_paths:
                    missed_mse = mean_squared_error(img_origin, img_reduced)
                    missed_scores.append(missed_mse)
                    missed_paths.append(decomposed_id)
                    logger.warning(f'No embedding for path {decomposed_id}, actual MSE would be: {missed_mse}')
                    mse = 0
                else:
                    try:
                        mse = mean_squared_error(img_origin, img_reduced)
                        
                    except:
                        try:
                            mse = mean_squared_error(img_origin, img_reduced[:,:,:3])  
                        except ValueError as e:
                            logger.warning(f'Could not calculate MSE for path {logo}_{i} '
                               f'- Error message: {e}')
                            
            except ValueError as e:
                logger.warning(f'Could not calculate MSE for path {logo}_{i} '
                               f'- Error message: {e}')
                counter += 1
                mse = 0
            relevance_scores.append(mse)
            
        
        relevance_score_ordering = list(range(nr_paths))
        relevance_score_ordering.sort(key=lambda x: relevance_scores[x], reverse=True)
        
        if coverage_percent < 0:
            relevance_score_ordering = relevance_score_ordering[0:nr_paths_trunc]
        else:
            
            # each path id is key and respective mse error is value
            path_mse_dict = dict()
            for path_id , mse in enumerate(relevance_scores):
                path_mse_dict[path_id] = mse
                
            # sort keys by values in descending order of mse error (path importance)
            path_mse_dict_sorted = {k: v for k, v in sorted(path_mse_dict.items(), key=lambda item: item[1],reverse= True)}

            # n - is a number of paths to keep according to sum mse covered parameter
            n = _get_number_of_paths_for_truncation(path_mse_dict_sorted.values(),coverage_percent)

            # take top n keys from sorted dictionary as relevant
            relevance_score_ordering = list(path_mse_dict_sorted.keys())[:n]


        missed_relevant_scores, missed_relevant_paths = list(), list()
        for i in range(len(missed_scores)):
            score = missed_scores[i]
            if score >= relevance_scores[relevance_score_ordering[-1]]:
                missed_relevant_scores.append(score)
                missed_relevant_paths.append(missed_paths[i])
        if len(missed_relevant_scores) > 0:
            logger.warning(f'Number of missed relevant paths due to embedding: {len(missed_relevant_scores)}')
        if counter > 0:
            logger.warning(f'Could not calculate MSE for {counter}/{nr_paths} paths')
        relevance_score_ordering = [id_ for id_ in relevance_score_ordering if relevance_scores[id_] != -1]
        return relevance_score_ordering, relevance_scores, missed_relevant_scores, missed_relevant_paths, n, len(relevance_scores),coverage_percent 

    
    
    
    
    
    
    def select_paths(self, excluded_paths):
        """ Iterate over a directory of SVG files and select relevant paths. Selected paths and original
        SVGs will be saved to Selector.dir_selected_paths/logo. Requires directory Selector.dir_path_selection.

        Args:
            svgs_folder (str): Directory containing SVG files from which to select relevant paths.
            excluded_paths (list (int)): List of animation IDs that should not be considered as relevant. These paths
            will be assigned a relevance score of -1.

        Returns:
            list (int): List of missed paths.

        """
        Path(self.dir_selected_paths).mkdir(parents=True, exist_ok=True)
#         logos = [f[:-4] for f in listdir(svgs_folder) if isfile(join(svgs_folder, f))]
        logo = self.logo_filename
        start = datetime.now()
        missed_scores, missed_paths = list(), list()

        sorted_ids, sorted_mses, missed_relevant_scores, missed_relevant_paths = \
            self.sort_by_relevance(f'{self.dir_path_selection}/{logo}', excluded_paths)
        missed_scores.append(len(missed_relevant_scores))
        missed_paths.extend(missed_relevant_paths)
        copyfile(f'{svgs_folder}/{logo}.svg', f'{self.dir_selected_paths}/{logo}_path_full.svg')
        for j, id_ in enumerate(sorted_ids):
            copyfile(f'{self.dir_decomposed_svgs}/{logo}_{id_}.svg',
                     f'{self.dir_selected_paths}/{logo}_path_{j}.svg')
        logger.info(f'Total number of missed paths: {sum(missed_scores)}')
        logger.info(f'Time: {datetime.now() - start}')
        return missed_paths
    
    
    
    

    def truncate_svgs(self, svgs_folder, logos=None, excluded_paths=list(), nr_paths_trunc= 20):
        """ Truncate SVGs to most relevant paths and save them to Selector.dir_truncated_svgs. Requires directory
        Selector.dir_path_selection.

        Args:
            svgs_folder (str): Directory containing SVG files from which to select relevant paths.
            logos (list): List of logos to be truncated.
            excluded_paths (list (int)): List of animation IDs that should not be considered as relevant. These paths
            will be assigned a relevance score of -1.
            nr_paths_trunc (int): Number of paths that should be kept as the most relevant ones.

        """
        number_of_total_paths = 0
        number_of_kept_paths = 0
        Path(self.dir_truncated_svgs).mkdir(parents=True, exist_ok=True)
        start = datetime.now()
        logos = [f[:-4] for f in listdir(svgs_folder) if isfile(join(svgs_folder, f))] if logos is None else logos
        for i, logo in enumerate(logos):
            print(logo)
            if i % 20 == 0:
                logger.info(f'Current logo {i}/{len(logos)}: {logo}')
            sorted_ids, _, _, _ , kept_paths, total_paths, coverage_percent  = self.sort_by_relevance(f'{self.dir_path_selection}/{logo}',
                                                         excluded_paths, self.threshold, nr_paths_trunc)
            


            try:
                number_of_kept_paths = number_of_kept_paths + kept_paths
                number_of_total_paths = number_of_total_paths + total_paths
            except:
                pass
            doc = minidom.parse(f'{svgs_folder}/{logo}.svg')
            original_elements = self.get_elements(doc)
            nb_original_elements = len(original_elements)
            for j in range(nb_original_elements):
                if j not in sorted_ids:
                    path = original_elements[j]
                    parent = path.parentNode
                    parent.removeChild(path)
                    
                with open(f'{self.dir_truncated_svgs}/{logo}_truncated.svg', 'wb') as file:
                    file.write(doc.toprettyxml(encoding='iso-8859-1'))

            doc.unlink()
            
#             print(f"Kept {number_of_kept_paths} out of total {number_of_total_paths} to cover {coverage_percent*100} percent of MSE ")


        logger.info(f'Time: {datetime.now() - start}')
        print(f"Kept {number_of_kept_paths} out of total {number_of_total_paths} to cover {coverage_percent*100} percent of MSE ")

        
        
        
        
    def truncate_svgs_output_string(self, logos=None, excluded_paths=list(), nr_paths_trunc= 20):
        """ Truncate SVGs to most relevant paths and save them to Selector.dir_truncated_svgs. Requires directory
        Selector.dir_path_selection.

        Args:
            svgs_folder (str): Directory containing SVG files from which to select relevant paths.
            logos (list): List of logos to be truncated.
            excluded_paths (list (int)): List of animation IDs that should not be considered as relevant. These paths
            will be assigned a relevance score of -1.
            nr_paths_trunc (int): Number of paths that should be kept as the most relevant ones.

        """
        number_of_total_paths = 0
        number_of_kept_paths = 0
        Path(self.dir_truncated_svgs).mkdir(parents=True, exist_ok=True)
        start = datetime.now()
        logo = self.logo_filename
        print(logo)
#         logos = [f[:-4] for f in listdir(svgs_folder) if isfile(join(svgs_folder, f))] if logos is None else logos
#         dictionary_of_strings = {}

        sorted_ids, _, _, _ , kept_paths, total_paths, coverage_percent  = self.sort_by_relevance(f'{self.dir_path_selection}/{logo}',
                                                     excluded_paths, self.threshold, nr_paths_trunc)
        try:
            number_of_kept_paths = number_of_kept_paths + kept_paths
            number_of_total_paths = number_of_total_paths + total_paths
        except:
            pass
        doc = minidom.parse(self.logo_path)
        original_elements = self.get_elements(doc)
        nb_original_elements = len(original_elements)
        for j in range(nb_original_elements):
            if j not in sorted_ids:
                path = original_elements[j]
                parent = path.parentNode
                parent.removeChild(path)
            with open(f'{self.dir_truncated_svgs}/{logo}_truncated.svg', 'wb') as file:
                file.write(doc.toprettyxml(encoding='iso-8859-1'))
        s = doc.toxml()
#         print(f"Kept {number_of_kept_paths} out of total {number_of_total_paths} to cover {coverage_percent*100} percent of MSE ")


        return s
        print("-"*1000)
        doc.unlink()
#             print(f"Kept {number_of_kept_paths} out of total {number_of_total_paths} to cover {coverage_percent*100} percent of MSE ")


        logger.info(f'Time: {datetime.now() - start}')
        print(f"Kept {number_of_kept_paths} out of total {number_of_total_paths} to cover {coverage_percent*100} percent of MSE ")




In [23]:
def truncate_logo_from_path_to_xml_string (logo_path, threshold = 0.5):
    sel = Selector_singe_Logo (logo_path, threshold = threshold)


    sel.delete_paths()

    output_string = sel.truncate_svgs_output_string()
    return output_string

In [24]:
xml_output = truncate_logo_from_path_to_xml_string("/Users/samailguliyev/SVG_LogoGenerator/animate_logos-main-untouched/data/svgs/San_Diego_State_Aztecs_logo.svg",
                                      threshold = 0.33)

San_Diego_State_Aztecs_logo
number of path to truncate: 1 out of 3


In [25]:
xml_output

'<?xml version="1.0" ?><svg height="244.35" version="1.1" viewBox="0 0 350.8093 244.3475" width="350.81" xml:space="preserve" xmlns="http://www.w3.org/2000/svg" xmlns:cc="http://creativecommons.org/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"><g transform="matrix(1.25 0 0 -1.25 -337.2 576.26)"><g><path d="m525.64 375.42-2.872-2.93-0.517 0.183-0.531 3.904-10.863 2.669-2.377-4.284-0.709 0.036-2.002 4.053-7.74-1.254-4.906-5.307-8.872-0.018c-3.982 20.601-22.206 33.589-43.917 40.256l-2.263-20.663c8.138-5.151 13.622-13.791 13.622-28.165 0-35.869-35.103-37.521-58.534-37.521h-7.402l0.006-16.8h16.837c37.781 0 76.808 13.639 81.917 46.755 0 0 0.156 0.967 0.318 2.317l7.798-0.047 5.218-4.923 8.231-1.21 1.72 4.079 0.61 0.084 2.799-3.878 10.843 2.757 0.312 4.048 0.672 0.221 3.034-2.984c0.814 0.195 16.947 8.51 16.895 9.526-0.063 1.105-17.327 9.096-17.327 9.096zm-178.96-11.52-3.659 35.342 64.486 0.032 0.005 18.958-100.71-0.039 10.426-44.178c-0