<a href="https://colab.research.google.com/github/MarkoNovi/eprel_tyre_sticker/blob/main/eprel_tyre_stickers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install cairosvg
!pip install PyMuPDF

In [None]:
#pip install cairosvg
from PIL import Image
from io import BytesIO
import cairosvg
import pandas as pd
import requests
import os
from shutil import make_archive, move
import fitz


def check_make_dir():
    '''
    checks if there is dir "data" in script dir,
    checks if there is dir "img" and "jpeg" in dir "data",
    creates those directories if anything is missing
    '''
    # Get the script directory
    script_dir = os.path.dirname(__name__)
    # Check if the "data" directory exists
    data_dir = os.path.join(script_dir, "data")
    if not os.path.exists(data_dir):
        os.makedirs(data_dir)
    ## move "eprel_links.xlsx" file to "data" dir
    try:
        move('eprel_links.xlsx', 'data/eprel_links.xlsx')
    except Exception as _:
        if os.path.exists('data/eprel_links.xlsx'):
            pass
        else:
            print("check_make_dir Error: ")
            input('Check if you uploaded "eprel_links.xlsx" file on google colab correctly and try again.')
            return False
    # Check if the "img" directory exists
    img_dir = os.path.join(data_dir, "img")
    if not os.path.exists(img_dir):
        os.makedirs(img_dir)
    # Check if the "jpeg" directory exists
    jpeg_dir = os.path.join(data_dir, "jpeg")
    if not os.path.exists(jpeg_dir):
        os.makedirs(jpeg_dir)


def get_eprel_sticker(eprel_link):
    '''
    take the link and download svg file (xml) form eprel website
    '''
    try:
        session = requests.Session()
        img_name = eprel_link.split('/')[-1]
        # Send a GET request to the URL to retrieve the image data
        response = session.get(f'https://eprel.ec.europa.eu/label/Label_{img_name}.svg')
        # Check if the request was successful
        if response.status_code == 200:
            # Save the image data to a file
            with open(fr'/content/data/img/{img_name}.svg', 'wb') as f:
                f.write(response.content)
            return True
        else:
            print('Error downloading image. Status code:', response.status_code)
            return False
    except:
        return False


def get_eprel_sticker_pdf(eprel_link):
    '''
    take the link and download pdf file form eprel website
    '''
    try:
        session = requests.Session()
        img_name = eprel_link.split('/')[-1]
        # Send a GET request to the URL to retrieve the image data
        response = session.get(f'https://eprel.ec.europa.eu/label/Label_{img_name}.pdf')
        # Check if the request was successful
        if response.status_code == 200:
            # Save the image data to a file
            with open(f'data/img/{img_name}.pdf', 'wb') as f:
                f.write(response.content)
            return True
        else:
            print('Error downloading image. Status code:', response.status_code)
            return False
    except:
        return False


def convert_svg_jpg(img_name, jpg_name):
    '''
    convert svg image to jpeg format
    '''
    try:
        # Path to your SVG file
        input_svg = f"data/img/{img_name}.svg"
        # Convert SVG to PNG
        png_data = cairosvg.svg2png(url=input_svg)
        # Create a PIL Image object from the PNG data
        image = Image.open(BytesIO(png_data)).convert('RGB')
        # Save as JPG
        output_jpg = f"data/jpeg/{jpg_name}EPREL.jpg"
        image.save(output_jpg)
        return True
    except:
        return False


def pdf_to_jpg(pdf_file, jpg_name):
    try:
        input_pdf = f"/content/data/img/{pdf_file}.pdf"
        pdf_document = fitz.open(input_pdf)
        for page_number in range(pdf_document.page_count):
            page = pdf_document.load_page(page_number)
            image = page.get_pixmap()
            output_jpg = f"/content/data/jpeg/{jpg_name}EPREL.jpg"
            image.save(output_jpg)
        pdf_document.close()
        return True
    except Exception as e:
        print('pdf_to_jpeg as Error: ', e)
        return False


def zip_jpeg_files():
    try:
        # Specify the folder path and the desired zip filename
        folder_to_zip = 'data/jpeg'
        zip_filename = 'data/jpeg'
        # Create the zip file
        make_archive(zip_filename, 'zip', folder_to_zip)
        return True
    except Exception as _:
        print("zip_jpeg_files Error:", _)
        return False


def main():
    '''
    place "eprel_links.xlsx" with column "TyreQRCode" in data dir,
    open & read xlsx file with eprel links,
    loop links to download svg file and convert file to jpeg format
    '''
    try:
        input('If you already did not, please place "eprel_links.xlsx" with column "TyreQRCode" in data dir and press enter to continue. Or just press enter to continue.')
        ## check if any, if not create dir /data, data/img, data/jpeg
        if check_make_dir() is False:
            return False
        ## read and format xlsx file with eprel links
        df = pd.read_excel(r'data/eprel_links.xlsx')
        try:
            eprel_links = df['TyreQRCode'].fillna(0).astype(int).astype(str)
        except:
            eprel_links = df['TyreQRCode']
        try:
            eprel_links['tokic_sifra'] = df['Tokić šifra'].fillna(0).astype(int).astype(str)
        except:
            eprel_links['tokic_sifra'] = df['Tokić šifra']
        try:
            eprel_links['kataloski_broj'] = df['Kataloški broj'].fillna(0).astype(int).astype(str)
        except:
            eprel_links['kataloski_broj'] = df['Kataloški broj']

        eprel_links = eprel_links.drop_duplicates()
        eprel_links = eprel_links.dropna()
        eprel_links = pd.DataFrame(eprel_links).reset_index()
        eprel_links['Done?'] = ''

        ## loop the eprel links
        for index, row in eprel_links.iterrows():
            try:
                print(index, "/", len(eprel_links) )
                ## check if eprel link is in ok format
                eprel_link = str(eprel_links.at[index, 'TyreQRCode'].split('/')[-1])
                # if 'https://eprel.ec.europa.eu/qr' not in eprel_link:
                #     eprel_links.at[index, 'Done?'] = 'link format error'
                #     print('link format error', eprel_link)
                #     continue

                ## get svg image from eprel link
                if get_eprel_sticker(eprel_link) is False:
                    eprel_links.at[index, 'Done?'] = 'svg download error'
                    print('svg download error', eprel_link)
                    continue

                ## convert svg image to jpeg format
                img_name = eprel_link.split('/')[-1]
                jpg_name = eprel_links.at[index, 'tokic_sifra']
                if convert_svg_jpg(img_name, jpg_name) is False:
                    if get_eprel_sticker_pdf(eprel_link) is False:
                        eprel_links.at[index, 'Done?'] = 'download error'
                        print('convert error', eprel_link)
                        continue
                    if pdf_to_jpg(img_name, jpg_name) is False:
                        eprel_links.at[index, 'Done?'] = 'convert error'
                        print('convert error', jpg_name)
                        continue

                ## mark eprel link as done in xlsx
                eprel_links.at[index, 'Done?'] = 'Done!'
                eprel_links.to_excel('data/done.xlsx')
            except Exception as _:
                eprel_links.at[index, 'Done?'] = 'loop error'
                print('loop error', _)
                pass

        if zip_jpeg_files() is True:
            return input('Please check "done.xlsx" file for any errors and download your "jpeg.zip" file')

    except Exception as _:
        print("main ERROR: ", _)
        input('')
        return False


if __name__ == "__main__":
    main()


In [66]:
## use to remove all data from "/data" directory
!rm -r data/

In [48]:
# !rm -r data/img/*
# !rm -r data/jpeg/*