# Tracking Scraper

In [1]:
from selenium import webdriver
from selenium.common.exceptions import *
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

from geopy.exc import GeopyError
from geopy.geocoders import Nominatim
from io import BytesIO
from PIL import Image
from pymongo import MongoClient

import datetime
import json
import logging
import pytesseract
import re
import time
import sys

## Exception class and constants

In [None]:
class TrackingScraperError(Exception):
    """Custom general exception for the Tracking Web Scraper."""
    pass

class TrackingScraperAssertionError(TrackingScraperError):
    """Exception for assertion commands."""
    def __init__(self, process_type, assertion_type):
        # Write message
        message = "not found" if assertion_type else "found"
        super().__init__("Assertion failed, " + process_type + " element(s) unexpectedly " + message)
        # Save attributes
        self.process_type   = process_type
        self.assertion_type = assertion_type

class TrackingScraperSwitcherError(TrackingScraperError):
    """Exception for errors in the switcher command file."""
    def __init__(self, message, command):
        # Write message
        super().__init__(message)
        # Save attributes
        self.command = command

In [4]:
class TrackingScraperConfig:
    """Constants and basic configuration for the Tracking Web Scraper."""
    
    # Default executable path for the Google Chrome webdriver
    DEFAULT_PATH_CHROME     = "../driver/chromedriver"
    # Default executable path for the Firefox webdriver
    DEFAULT_PATH_FIREFOX    = "../driver/geckodriver"
    
    # Default database name
    DEFAULT_DATABASE_NAME   = "scraper2"
    # Default table name for containers
    DEFAULT_CONTAINER_TABLE = "containers"
    # Default table name for container movements
    DEFAULT_MOVEMENT_TABLE  = "container_movements"
    # Default query parameters for containers
    DEFAULT_CONTAINER_QUERY = ["container"]
    # Default query parameters for container movements
    DEFAULT_MOVEMENT_QUERY  = ["container", "date"]
    
    # Default user agent for the Nominatim geocode API service
    DEFAULT_GEOCODE_AGENT   = "Tracking Scraper for Containers"
    # Default logging level configuration
    DEFAULT_LOGGING_LEVEL   = logging.INFO
    # Default logging format configuration
    DEFAULT_LOGGING_FORMAT  = "[%(levelname)s %(asctime)s] %(message)s"
    
    # Default timeout for short processing, in seconds
    DEFAULT_TIMEOUT         = 30
    # Default timeout for long processing, in seconds
    DEFAULT_TIMEOUT_LONG    = 90
    # Default wait for long actions, in seconds
    DEFAULT_WAIT_LONG       = 5
    # Default wait for short actions, in seconds
    DEFAULT_WAIT_SHORT      = 1.5
    
    # Default value for the key "required" in all types
    DEFAULT_KEY_REQUIRED    = True
    # Default value for the key "action" in type "alert"
    DEFAULT_KEY_ACTION      = True
    # Default value for the key "wait" in type "click"
    DEFAULT_KEY_WAIT        = True
    # Default value for the key "clean" in type "write"
    DEFAULT_KEY_CLEAN       = False
    # Default value for the key "enter" in type "write"
    DEFAULT_KEY_ENTER       = False
    # Default value for the key "overwrite" in multiple configuration
    DEFAULT_KEY_OVERWRITE   = False
    # Default value for the key "frame" in selector types
    DEFAULT_KEY_FRAME       = False
    
    # Default value for the key "processed" in upserting container info
    DEFAULT_KEY_PROCESSED   = True
    # Default value for the key "estimated" in container movements
    DEFAULT_KEY_ESTIMATED   = True
    
    # Default thousand separator symbol
    DEFAULT_THOUSAND_SYMBOL = ","
    # Default datetime locale information
    DEFAULT_DATETIME_LOCALE = {
        "hours": -5
    }
    
    # Default image to black-and-white pixel pivot
    DEFAULT_BNW_PIVOT       = 32
    # Default value for the key "alphabet" in image processing (include alphabet symbols?)
    DEFAULT_KEY_ALPHABET    = True
    # Default value for the key "numbers" in image processing (include numeric symbols?)
    DEFAULT_KEY_NUMBERS     = False
    # Default value for the key "length" in image processing (desired text length)
    DEFAULT_KEY_LENGTH      = 4

## Selector class

In [5]:
class TrackingScraperSwitcher:
    """
    Switcher for selecting and saving Web elements and subelements in a tracking-related document.
    """
    
    def __init__(self, driver, document, configuration, parent_command, parent_element = None):
        self.__driver         = driver
        self.__document       = document
        self.__configuration  = configuration
        self.__parent_command = parent_command
        self.__parent_element = driver if parent_element is None else parent_element
    
    @property
    def document(self):
        """Returns the stored tracking-related dictionary."""
        return self.__document
    
    ###############################################################################################
    
    def process(self):
        """
        Get Web elements based on the current configuration command, then process or return them
        accordingly. Returns True if all commands and subcommands were executed successfully,
        False if one command failed, or the list of Web elements if no subcommands were found.
        """
        # Get process type
        process_type = self.__parent_command.get("type")
        if process_type is None:
            raise TrackingScraperSwitcherError("Process type not found", self.__parent_command)
        # logging.info("Process type: %s", process_type)
        
        # Execute process based on process type
        try:
            method = getattr(self, "_process_" + process_type)
            return method()
        except AttributeError:
            raise TrackingScraperSwitcherError("Process type " + process_type + " is not valid",
                                               self.__parent_command)
    
    ###############################################################################################
    
    def _process_id(self):
        return self.__process_dom_elements(By.ID)
    def _process_class(self):
        return self.__process_dom_elements(By.CLASS_NAME)
    def _process_css(self):
        return self.__process_dom_elements(By.CSS_SELECTOR)
    def _process_name(self):
        return self.__process_dom_elements(By.NAME)
    def _process_tag(self):
        return self.__process_dom_elements(By.TAG_NAME)
    def _process_xpath(self):
        return self.__process_dom_elements(By.XPATH)
    
    def __process_dom_elements(self, selector_type):
        # Get selector
        selector = self.__parent_command.get("selector")
        if selector is None:
            raise TrackingScraperSwitcherError("Selector not found in process by " + selector_type,
                                               self.__parent_command)
        
        # Check assertions
        assertions = self.__check_assertions(selector_type, selector)
        if assertions is True:
            # logging.info("Assertions are correct")
            return True
        
        # Get DOM elements
        dom_elements = self.__parent_element.find_elements(selector_type, selector)
        
        # Check requirements
        required = self.__parent_command.get("required", TrackingScraperConfig.DEFAULT_KEY_REQUIRED)
        if len(dom_elements) == 0:
            logging.info("Command: %s", self.print_command(self.__parent_command))
            logging.info("No elements found, using required")
            return not required
        
        # Get child command list and process them, if possible
        commands = self.__parent_command.get("commands")
        if isinstance(commands, list):
            return self.__process_child_commands(commands, dom_elements)
        
        # Get a child command for all elements and process them, if possible
        child_command = self.__parent_command.get("command")
        if isinstance(child_command, dict):
            for child_element in dom_elements:
                result = self.__generate_child_process(child_command, child_element)
                if result is not True:
                    return result
            return True
        
        # If no single child command was found, return all DOM elements
        logging.info("Command: %s", self.print_command(self.__parent_command))
        logging.info("No child commands found, return all elements")
        return dom_elements
    
    def __check_assertions(self, selector_type, selector):
        assertion = self.__parent_command.get("assert")
        
        if isinstance(assertion, bool):
            # Set expected conditions depending if we want to switch to a frame or not
            frame = self.__parent_command.get("frame", TrackingScraperConfig.DEFAULT_KEY_FRAME)
            if frame:
                conditions = EC.frame_to_be_available_and_switch_to_it((selector_type, selector))
            else:
                conditions = EC.presence_of_all_elements_located((selector_type, selector))
            
            # Prepare waiter
            waiter = WebDriverWait(self.__driver, TrackingScraperConfig.DEFAULT_TIMEOUT)
            
            if assertion:
                # Assert at least one element found
                try:
                    waiter.until(conditions)
                except TimeoutException:
                    raise TrackingScraperAssertionError(selector_type, True)
            else:
                # Assert no elements found
                try:
                    waiter.until_not(conditions)
                except TimeoutException:
                    raise TrackingScraperAssertionError(selector_type, False)
            
            # Wait a little bit and return
            time.sleep(TrackingScraperConfig.DEFAULT_WAIT_SHORT)
            return True
        
        # logging.info("Command: %s", self.print_command())
        # logging.info("Assertions not found")
        return False
    
    def __process_child_commands(self, commands, elements):
        for child_command in commands:
            # Get index
            index = child_command.get("index")
            if index is None:
                raise TrackingScraperSwitcherError("Child index command not found", child_command)
            
            # Check requirements
            if index >= len(elements):
                logging.info("Command: %s", self.print_command(child_command))
                logging.info("Child element at index %d not found, using required", index)
                return not child_command.get("required", TrackingScraperConfig.DEFAULT_KEY_REQUIRED)
            
            # Process child element at specified index
            # logging.info("Child index: %d", index)
            result = self.__generate_child_process(child_command, elements[index])
            
            # If no subelements were found, return that element or element list
            # If a minor error occured (e.g. element not found), return False
            if result is not True:
                return result
        
        # If everything was fine, return True
        return True
    
    def __generate_child_process(self, child_command, child_element):
        return TrackingScraperSwitcher(self.__driver, self.__document, self.__configuration,
                                       child_command, child_element).process()
    
    ###############################################################################################
    
    def _process_split(self):
        # Get text to split
        parent_text = self.__get_parent_text()
        
        # Get text separator
        delimiter = self.__parent_command.get("delimiter")
        if delimiter is None:
            raise TrackingScraperSwitcherError("No separator found", self.__parent_command)
        
        # Split text
        elements = parent_text.split(delimiter)
        
        # Get child command list and process them, if possible
        commands = self.__parent_command.get("commands")
        if isinstance(commands, list):
            return self.__process_child_commands(commands, elements)
        
        # If no single child command was found, return split list
        return elements
    
    def __get_parent_text(self):
        parent_text = self.__parent_element
        try:
            return parent_text.text.strip() # value is a DOM element, we need its inner text
        except AttributeError:
            return parent_text.strip() # value is already a string
    
    ###############################################################################################
    
    def _process_regex(self):
        """Split text from a DOM element based on a delimiter."""
        
        # Get text
        text = self.__get_parent_text()
        
        # Get regular expression pattern
        pattern = self.__parent_command.get("pattern")
        if pattern is None:
            raise TrackingScraperSwitcherError("No regular expression found", self.__parent_command)
        
        # Match expression with text
        regex    = re.search(pattern, text)
        required = self.__parent_command.get("required", TrackingScraperConfig.DEFAULT_KEY_REQUIRED)
        if regex is None:
            logging.info("Command: %s", self.print_command(self.__parent_command))
            logging.info("Regular expression does not match text, using required")
            return not required
        
        # Get list of matched elements
        elements = list(regex.groups())
        
        # Get child command list and process them, if possible
        commands = self.__parent_command.get("commands")
        if isinstance(commands, list):
            return self.__process_child_commands(commands, elements)
        
        # If no single child command was found, return list of matched elements
        return elements
    
    ###############################################################################################
    
    def _process_save(self):
        """Saves text or subtext from a DOM element, or an specified value."""
        
        attribute = self.__parent_command.get("key")
        if attribute is None:
            raise TrackingScraperSwitcherError("Save key not found", self.__parent_command)
        
        # If a value was already defined, save it and exit
        value = self.__parent_command.get("value")
        if value is not None:
            self.__document[attribute] = value
            return True
        
        # Get text to be saved, and verify if it's not empty
        value    = self.__get_parent_text()
        required = self.__parent_command.get("required", TrackingScraperConfig.DEFAULT_KEY_REQUIRED)
        if len(value) == 0:
            logging.info("Command: %s", self.print_command(self.__parent_command))
            logging.info("Text to save is empty, using required")
            return not required
        
        # Format type if necessary
        format_type = self.__parent_command.get("format")
        if format_type is not None:
            value = TrackingScraperConverter(self.__document, value, format_type,
                                             self.__configuration).convert()
            # If a value already exists in the attribute and it's a datetime object, join them
            if self.__join_datetimes_if_possible(attribute, value):
                return True
            
        # Save according to parent key and formatting value
        self.__document[attribute] = value
        return True
    
    def __join_datetimes_if_possible(self, attribute, new_value):
        # Check if attribute exists
        if attribute not in self.__document:
            return False
        
        # Get value from attribute
        old_value = self.__document[attribute]
        
        # Check if old value is a date and new value is a time
        if isinstance(old_value, datetime.datetime) and isinstance(new_value, datetime.time):
            self.__document[attribute] = datetime.datetime.combine(old_value.date(), new_value)
            return True
        
        # Return False if nothing was found
        return False
    
    ###############################################################################################
    
    def _process_attr(self):
        # Get attribute name
        attribute_name = self.__parent_command.get("name")
        if attribute_name is None:
            raise TrackingScraperSwitcherError("Attribute name not found", self.__parent_command)
        
        # Get attribute value from parent element
        attribute = self.__parent_element.get_attribute(attribute_name)
        
        # Get child command, if none found, return attribute
        child_command = self.__parent_command.get("command")
        if child_command is not None:
            # logging.info("ATTRIBUTE - Child command found")
            return TrackingScraperSwitcher(self.__driver, self.__document, self.__configuration,
                                           child_command, attribute).process()
        # print(attribute)
        # logging.info("ATTRIBUTE - No child command found")
        return attribute
    
    ###############################################################################################
    
    def _process_compare(self):
        # Get text to compare
        text = self.__get_parent_text()
        
        # Get values to compare
        values = self.__parent_command.get("values")
        if values is None:
            raise TrackingScraperSwitcherError("Values to compare not found", self.__parent_command)
        
        # Check if text equals to value, or if it is in value list, then act accordingly
        if text in values:
            commands = self.__parent_command.get("success")
            return self.__process_compare_commands(commands, "Success")
        else:
            commands = self.__parent_command.get("failure")
            return self.__process_compare_commands(commands, "Failure")
    
    def __process_compare_commands(self, commands, compare_result):
        # Check requirements
        required = self.__parent_command.get("required", TrackingScraperConfig.DEFAULT_KEY_REQUIRED)
        if commands is None:
            logging.info("Command: %s", self.print_command(self.__parent_command))
            logging.info(compare_result + " commands not found, resorting to required")
            return not required
        
        # Process child commands
        for child_command in commands:
            result = self.__generate_child_process(child_command, self.__parent_element)
            if result is not True:
                return result
        return True
    
    ###############################################################################################
    
    def _process_write(self):
        # Get value
        value = self.__parent_command.get("value")
        if value is None:
            # Get value from attribute
            attribute = self.__parent_command.get("attribute")
            if attribute is None:
                raise TrackingScraperSwitcherError("No value or attribute to use as input",
                                                   self.__parent_command)
            value = self.__document.get(attribute)
        
        try:
            # Clear element if specified
            if self.__parent_command.get("clean", TrackingScraperConfig.DEFAULT_KEY_CLEAN):
                self.__parent_element.clear()
            # Write value
            self.__parent_element.send_keys(value)
            # Send enter if specified
            if self.__parent_command.get("enter", TrackingScraperConfig.DEFAULT_KEY_ENTER):
                self.__parent_element.send_keys(Keys.ENTER)
        except ElementNotInteractableException:
            raise TrackingScraperSwitcherError("Element is not interactable (because of Selenium)",
                                               self.__parent_command)
        except AttributeError:
            raise TrackingScraperSwitcherError("Element is not interactable (because of attribute)",
                                               self.__parent_command)
        
        # Return True to indicate everything is OK
        time.sleep(TrackingScraperConfig.DEFAULT_WAIT_SHORT)
        return True
    
    ###############################################################################################
    
    def _process_alert(self):
        assertion = self.__parent_command.get("assertion")
        # TODO: Usar waits
        try:
            # Try to switch to alert
            alert = self.__driver.switch_to.alert
            if assertion is False:
                raise TrackingScraperAssertionError("alert", False)
            # Accept or dismiss action depending on command
            if self.__parent_command.get("action", TrackingScraperConfig.DEFAULT_KEY_ACTION):
                alert.accept()
            else:
                alert.dismiss()
        except NoAlertPresentException:
            if assertion is True:
                raise TrackingScraperAssertionError("alert", True)
        
        # Return True to indicate everything is OK
        return True
    
    ###############################################################################################
    
    def _process_click(self):
        # Check requirements
        required = self.__parent_command.get("required", TrackingScraperConfig.DEFAULT_KEY_REQUIRED)
        if not self.__parent_element.is_displayed():
            return not required
        if not self.__parent_element.is_enabled():
            return not required
        
        try:
            # Try to click the element
            self.__parent_element.click()
            
            # Wait 2 or 5 seconds depending on "wait" attribute
            wait_time = self.__parent_command.get("wait", TrackingScraperConfig.DEFAULT_KEY_WAIT)
            if wait_time:
                time.sleep(TrackingScraperConfig.DEFAULT_WAIT_LONG)
            else:
                time.sleep(TrackingScraperConfig.DEFAULT_WAIT_SHORT)
                
            # Return True to indicate everything is OK
            return True
        except ElementNotInteractableException:
            return not required
    
    ###############################################################################################
    
    def _process_ocr(self):
        # Reset image to default width and height
        self.__set_element_attribute("width")
        self.__set_element_attribute("height")
        
        # Take screenshot of element and process it
        image_bytes = self.__parent_element.screenshot_as_png
        result = TrackingScraperImageProcessor(self.__parent_command, image_bytes).execute()
        
        # If image processing failed, execute failure commands if they exist
        if result is None:
            failure_command = self.__parent_command.get("failure")
            if failure_command is not None:
                self.__generate_child_process(failure_command, self.__driver)
            return False
        
        # Find element to write image text to
        element_command = self.__parent_command.get("write")
        if not isinstance(element_command, dict):
            raise TrackingScraperSwitcherError("Process OCR write command not found",
                                               self.__parent_command)
        element = self.__generate_child_process(element_command, self.__driver)
        
        # Write to element
        write_command = {"type": "write", "value": result}
        return self.__generate_child_process(write_command, element)
    
    def __set_element_attribute(self, attribute_name):
        # Get attribute value to set, if none found, value will be set to null
        attribute_value = self.__parent_command.get(attribute_name)
        # Set element attribute with JavaScript
        self.__driver.execute_script("arguments[0].setAttribute(arguments[1], arguments[2]);",
                                     self.__parent_element, attribute_name, attribute_value)
    
    ###############################################################################################
    
    @staticmethod
    def print_command(command):
        command_texts = []
        for key, value in command.items():
            if isinstance(value, list):
                command_texts.append('"{0}": list({1})'.format(key, len(value)))
            elif isinstance(value, dict):
                command_texts.append('"{0}": dict({1})'.format(key, len(value)))
            elif isinstance(value, str):
                command_texts.append('"{0}": "{1}"'.format(key, value))
            else:
                command_texts.append('"{0}": {1}'.format(key, value))
        return "{" + ", ".join(command_texts) + "}"

## Converter class

In [1]:
class TrackingScraperConverter:
    """Utility class to convert text to other Python types."""
    
    def __init__(self, document, raw_text, format_type, configuration):
        self.__document      = document
        self.__raw_text      = raw_text
        self.__format_type   = format_type
        self.__configuration = configuration
    
    def convert(self):
        """Try to convert to the desired type, if none found, return text as-is."""
        try:
            method = getattr(self, "_convert_to_" + self.__format_type)
            return method()
        except AttributeError:
            logging.info("Convertion to " + self.__format_type + " not supported, resorting to text")
            return self.__raw_text
        except TypeError:
            raise TrackingScraperError("Convertion to " + self.__format_type + " cannot be invoked")
    
    def _convert_to_int(self):
        """Convert text to an integer."""
        try:
            return int(self.__raw_text.replace(TrackingScraperConfig.DEFAULT_THOUSAND_SYMBOL, ""))
        except ValueError:
            logging.info("Convertion to integer failed, resorting to text")
            return self.__raw_text
    
    def _convert_to_float(self):
        """Convert text to a double-precision floating-point number."""
        try:
            return float(self.__raw_text.replace(TrackingScraperConfig.DEFAULT_THOUSAND_SYMBOL, ""))
        except ValueError:
            logging.info("Convertion to float failed, resorting to text")
            return self.__raw_text
    
    def _convert_to_double(self):
        # Alias for self._convert_to_float().
        return self._convert_to_float()
    
    def _convert_to_date(self):
        """Convert text to a Python datetime object."""
        # Get datetime patterns
        try:
            patterns = self.__configuration["general"]["date_formats"]
        except KeyError:
            logging.info("Datetime patterns not found, resorting to text")
            return self.__raw_text
        
        # Try each pattern until it matches one
        for pattern in patterns:
            try:
                return datetime.datetime.strptime(self.__raw_text, pattern)
            except ValueError:
                continue
        
        # If none of the patterns matched, return text as-is
        logging.info("None of the patterns matched, resorting to text")
        return self.__raw_text
    
    def _convert_to_datetime(self):
        return self._convert_to_date()
    
    def _convert_to_time(self):
        """Convert text to a Python time object."""
        value = self._convert_to_date()
        if isinstance(value, datetime.datetime):
            return value.time()
        return value
    
    def _convert_to_datelocal(self):
        """Convert text to a Python datetime object taking the defined locale into account."""
        value = self._convert_to_date()
        if isinstance(value, datetime.datetime):
            return value - datetime.timedelta(**TrackingScraperConfig.DEFAULT_DATETIME_LOCALE)
        return value
    
    def _convert_to_timelocal(self):
        """Convert text to a Python time object taking the defined locale into account."""
        value = self._convert_to_datelocal()
        if isinstance(value, datetime.datetime):
            return value.time()
        return value
    
    def _convert_to_location(self):
        """Convert text to a location with latitude and longitude geographical points."""
        # Get location (raw text) as address
        location = self.__raw_text
        # Use last line as parent location and query it to Nominatim
        try:
            geolocator  = Nominatim(user_agent = TrackingScraperConfig.DEFAULT_GEOCODE_AGENT)
            coordinates = geolocator.geocode(location.split("\n")[-1])
            # Save the coordinates given by the service, if they exist
            if coordinates is not None:
                self.__document["latitude"]  = coordinates.latitude
                self.__document["longitude"] = coordinates.longitude
        except GeopyError:
            logging.exception("Error while trying to query geocode")
        # Finally, go to the scraper switcher to save the location as text
        return location
    
    def _convert_to_status(self):
        """Convert text to a tracking status based on the configuration for translation."""
        # TO-DO
        return self.__raw_text

## Image processor class

In [None]:
class TrackingScraperImageProcessor:
    """Image processor for the Tracking Scraper."""
    
    def __init__(self, parent_command, image_data):
        self.__parent_command = parent_command
        try:
            self.__image = Image.open(BytesIO(image_data))
        except Exception as ex:
            raise TrackingScraperSwitcherError("Image could not be opened: " + str(ex),
                                               self.__parent_command)
    
    def execute(self):
        """Process image with an OCR, depending on command configuration."""
        
        # Execute image child commands
        commands = self.__parent_command.get("commands")
        if isinstance(commands, list):
            for child_command in commands:
                self._find_command(child_command)
        
        # Process text with an OCR
        text = self._image_to_text()
        
        # Check if it has the desired length
        length = self.__parent_command.get("length", TrackingScraperConfig.DEFAULT_KEY_LENGTH)
        if len(text) != length:
            logging.info("Text does not have desired length, retrying...")
            return None
        
        # Check for possible problems in text
        filter_chars = self.__parent_command.get("filter")
        if filter_chars is not None:
            for char in filter_chars:
                if char in text:
                    logging.info("Text has dangerous characters, retrying...")
                    return None
        
        return text
    
    def _find_command(self, command):
        try:
            # Get command type
            command_type = command["type"]
            # Get command based on command type
            method = getattr(self, "_command_" + command_type)
            # Execute command
            return method()
        except KeyError:
            raise TrackingScraperSwitcherError("Image command type not found", self.__parent_command)
        except AttributeError:
            raise TrackingScraperSwitcherError("Image command type " + command_type + " is not valid",
                                               self.__parent_command)
    
    def _command_bnw(self, command):
        """Converts image to black and white."""
        
        # Get pixel pivot value, if it doesn't exist, assume default
        pivot = command.get("pivot", TrackingScraperConfig.DEFAULT_BNW_PIVOT)
        if not isinstance(pivot, int):
            raise TrackingScraperError("Image to black and white: pivot must be an integer")
        if pivot < 0 or pivot > 255:
            raise TrackingScraperError("Image to black and white: pivot must be between [0, 255]")
        
        # Convert image to black and white
        self.__image = Image.eval(self.__image, lambda pixel: 0 if pixel <= pivot else 255)
    
    def _image_to_text(self):
        whitelist = ""
        
        # Check if we should include alphabetical letters
        alphabet = self.__parent_command.get("alphabet", TrackingScraperConfig.DEFAULT_KEY_ALPHABET)
        if alphabet:
            whitelist += "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
        
        # Check if we should include numeric letters
        numbers = self.__parent_command.get("numbers", TrackingScraperConfig.DEFAULT_KEY_NUMBERS)
        if numbers:
            whitelist += "0123456789"
        
        # Process image according to whitelist
        if whitelist:
            text = pytesseract.image_to_string(self.__image,
                                               config = "-c tessedit_char_whitelist=" + whitelist)
        else:
            text = pytesseract.image_to_string(self.__image)
        
        # Clean whitespace
        return text.replace(" ", "")

## Main scraper class

In [9]:
class TrackingScraper:
    """Main class for the Tracking Web Scraper."""
    
    def __init__(self, driver, container_table, movement_table, document):
        self.__driver          = driver
        self.__container_table = container_table
        self.__movement_table  = movement_table
        self.__document        = document
        
        # Get configuration file
        try:
            with open("../config/" + self.__document["carrier"] + ".json") as file:
                self.__configuration = json.load(file)
        except KeyError:
            raise TrackingScraperError("Carrier not found")
        except FileNotFoundError:
            raise TrackingScraperError("Configuration file not found")
        
        # Check if general configuration exists
        if "general" not in self.__configuration:
            raise TrackingScraperError("General configuration information not found")
        
    @property
    def document(self):
        """Returns the container information."""
        return self.__document
    
    ###############################################################################################
    
    def execute(self):
        """Execute commands."""
        
        parent_result   = False
        input_result    = False
        single_result   = False
        multiple_result = False
        
        try:
            start = self._go_to_url()
            while True:
                # Check if we're still on time
                end = time.time()
                if (end - start) > TrackingScraperConfig.DEFAULT_TIMEOUT:
                    raise TrackingScraperError("Timeout exceeded, scraping was unsuccessful")
                
                # Execute input
                input_result = self._execute_commands(input_result, "input")
                if input_result is not True:
                    logging.info("Input execution was unsuccessful, retrying...")
                    continue
                
                # Execute single output
                single_result = self._execute_commands(single_result, "single")
                if single_result is not True:
                    logging.info("Single output execution was unsuccessful, retrying...")
                    continue
                
                # Execute multiple output
                multiple_result = self._execute_multiple_output(multiple_result)
                if multiple_result is not True:
                    logging.info("Multiple output execution was unsuccessful, retrying...")
                    continue
                
                # Finish execution and save elements
                parent_result = self._finish_execution()
                time.sleep(TrackingScraperConfig.DEFAULT_WAIT_SHORT)
                break
        except TrackingScraperError:
            logging.exception("Exception ocurred")
        except Exception:
            logging.exception("Unknown exception ocurred")
        finally:
            return parent_result
    
    ###############################################################################################
    
    def _go_to_url(self):
        # Check if general configuration is declared
        general_config = self.__configuration.get("general")
        if general_config is None:
            raise TrackingScraperError("Configuration information not found")
        
        # Get configuration URL
        link = self.__configuration["general"].get("url")
        if link is None:
            raise TrackingScraperError("Configuration URL could not be found")
        
        # Go to desired URL
        try:
            self.__driver.get(link.format(**self.__document))
            time.sleep(TrackingScraperConfig.DEFAULT_WAIT_LONG)
        except TimeoutException:
            raise TrackingScraperError("Error loading Web page, timeout exceeded")
        
        # Start time counting
        return time.time()
    
    ###############################################################################################
    
    def _execute_commands(self, parent_result, key):
        # Check if commands were already executed
        if parent_result is True:
            return True
        
        # Get commands, if none found, return True
        commands = self.__configuration.get(key)
        if commands is None:
            return True
        
        # Process commands
        for command in commands:
            result = TrackingScraperSwitcher(self.__driver, self.__document, self.__configuration,
                                             command).process()
            if result is not True:
                return False
        
        # Return True if everything was OK
        return True
    
    ###############################################################################################
    
    def _execute_multiple_output(self, multiple_result):
        if multiple_result is True:
            return True
        
        # Get multiple command, if none found, return True
        multiple_command = self.__configuration.get("multiple")
        if multiple_command is None:
            return True
        
        # Get configuration key
        multiple_configuration = self.__configuration["general"].get("multiple")
        if multiple_configuration is None:
            return True
        
        # Create multiple document based on configuration file
        multiple_document = dict(self.__configuration.get("multiple"))
        multiple_document["container"] = self.__document["container"]
        
        # Generate and process multiple documents
        estimated = multiple_configuration.get("estimated", TrackingScraperConfig.DEFAULT_KEY_ESTIMATED)
        multiple_document["estimated"] = estimated
        return self.__process_multiple_elements(multiple_command, multiple_document, self.__driver)
    
    def __process_multiple_elements(self, multiple_command, multiple_document, previous_element):
        # Get single subcommands
        multiple_single_commands = multiple_command.get("single")
        if not isinstance(multiple_single_commands, list):
            raise TrackingScraperError("Multiple command must have single commands key")
        
        # Get command to find parents, if none found, use driver to extract single commands
        multiple_parents = multiple_command.get("parents")
        if multiple_parents is None:
            multiple_elements = [previous_element]
        else:
            multiple_elements = TrackingScraperSwitcher(self.__driver, {}, self.__configuration,
                                                        multiple_parents, previous_element).process()
            if not isinstance(multiple_elements, list):
                raise TrackingScraperError("Parent elements must be a list of web elements")
        
        # Get multiple subcomamnd
        multiple_multiple_command = multiple_command.get("multiple")
        
        # Process every single command for every multiple element
        for multiple_subelement in multiple_elements:
            subdocument = dict(multiple_document)
            for single_command in multiple_single_commands:
                single_result = TrackingScraperSwitcher(self.__driver, subdocument,
                                                        self.__configuration, single_command,
                                                        multiple_subelement).process()
                if single_result is not True:
                    logging.info("Multiple: single subcommand failed")
                    return False
            
            # Check if multiple subcommand exists, if it doesn't, save and continue.
            if multiple_multiple_command is None:
                self._insert_or_update(subdocument, self.__movement_table,
                                       TrackingScraperConfig.DEFAULT_CONTAINER_QUERY)
                continue
            
            # If it exists, copy result document and iterate these new multiple elements with it
            multiple_result = self.__process_multiple_elements(multiple_multiple_command,
                                                               subdocument, multiple_subelement)
            if multiple_result is not True:
                logging.info("Multiple: multiple subcommand failed")
                return False
        
        # Return True to notify everything is OK
        return True
    
    ###############################################################################################
    
    def _finish_execution(self):
        # Get configuration for single element
        single_config = self.__configuration["general"].get("single")
        if isinstance(single_config, dict):
            # Get processed value to save
            processed_value = single_config.get("processed", TrackingScraperConfig.DEFAULT_KEY_PROCESSED)
            self.__document["processed"] = processed_value
        
        # Get collection and upsert container
        return self._insert_or_update(self.__document, self.__container_table,
                                      TrackingScraperConfig.DEFAULT_MOVEMENT_QUERY)
    
    def _insert_or_update(self, document, collection, query_keys):
        # Create shallow copy of document, with specified keys, for query
        query_document = self._create_query_document(document, query_keys)
        logging.info("Query document: %s", query_document)
        
        # Try to update
        document["updated_at"] = datetime.datetime.utcnow()
        result = collection.update_one(query_document, {"$set": document})
        
        if result.matched_count > 0:
            logging.info("Updated: %s", query_document)
            return True
        
        # If update was unsuccessful, insert document
        document["created_at"] = datetime.datetime.utcnow()
        document["updated_at"] = None
        
        result = collection.insert_one(document)
        logging.info("Inserted: %s", query_document)
        return True
    
    def _create_query_document(self, document, query_keys):
        query_document = {}
        for key in query_keys:
            query_document[key] = document.get(key)
        return query_document

## Pruebas

In [10]:
import unittest

class TrackingScraperTestCase(unittest.TestCase):
    
    def setUp(self):
        self.driver = webdriver.Chrome(executable_path = TrackingScraperConfig.DEFAULT_PATH_CHROME)
        self.database = MongoClient()["tracking_scraper"]
        self.containers = self.database["containers"]
        self.container_movements = self.database["container_movements"]
    
    def tearDown(self):
        self.driver.close()
    
    def testHapagLloyd(self):
        container = {
            "year": "2019", "manifest": "TEST", "detail": "1", "container": "FSCU5670046", "carrier": "Hapag-Lloyd"
        }
        self.executeTest(container)
        assert container["type"]          == "45RT"
        assert container["description"]   == "REEFER CONTAINER"
        assert container["length"]        == "40'"
        assert container["width"]         == "8'"
        assert container["height"]        == "9'6\""
        assert container["tare"]          == 4640
        assert container["max_payload"]   == 29360
        assert container["last_status"]   == "vessel departed"
        assert container["last_location"] == "ANTWERP"
        assert container["last_date"]     == datetime.datetime(2019, 3, 26)
    
    def testMaersk(self):
        container = {
            "year": "2019", "manifest": "TEST", "detail": "3", "container": "MAEU6835658", "carrier": "Maersk"
        }
        self.executeTest(container)
        assert container["origin_point"]  == "Izmit Korfezi"
        assert container["dest_point"]    == "Melbourne"
        assert container["description"]   == "20ft Dry Container"
        assert container["arrival_date"]  == datetime.datetime(2019, 3, 3, 9, 49)
        assert container["last_status"]   == "Gate out"
        assert container["last_location"] == "Melbourne, Victoria, Australia"
        assert container["last_date"]     == datetime.datetime(2019, 3, 5)
    
    def testEvergreen(self):
        container = {
            "year": "2019", "manifest": "TEST", "detail": "4", "container": "EGSU9089973", "carrier": "Evergreen"
        }
        self.executeTest(container)
        assert container["type"]          == "40'(SH)"
        assert container["arrival_date"]  == datetime.datetime(2019, 4, 11)
        assert container["vessel_voyage"] == "EVER LAMBENT 0403-037W"
    
    def testTextainer(self):
        container = {
            "year": "2019", "manifest": "TEST", "detail": "5", "container": "TEMU3806660", "carrier": "Textainer"
        }
        self.executeTest(container)
        assert container["last_status"] == "ON HIRE"
        assert container["last_date"]   == datetime.datetime(2017, 8, 1)
        assert container["carrier"]     == "Maersk"
    
    def executeTest(self, container):
        scraper = TrackingScraper(self.driver, self.database, container)
        assert scraper.execute() is True

## Configuración general

In [3]:
today = datetime.datetime.now().strftime("%Y%m%d")
logging.basicConfig(filename = "../logs/scraper-" + today + ".log",
                    level = TrackingScraperConfig.DEFAULT_LOGGING_LEVEL,
                    format = TrackingScraperConfig.DEFAULT_LOGGING_FORMAT)

database        = MongoClient()[TrackingScraperConfig.DEFAULT_DATABASE_NAME]
container_table = database[TrackingScraperConfig.DEFAULT_CONTAINER_TABLE]
movements_table = database[TrackingScraperConfig.DEFAULT_MOVEMENT_TABLE]

In [9]:
fail_counter = 0
start = time.time()
driver = webdriver.Chrome(executable_path = TrackingScraperConfig.DEFAULT_PATH_CHROME)
for container in containers:
    if fail_counter >= 10:
        logging.error("Too much failures, aborting")
        break
    container_start = time.time()
    try:
        scraper = TrackingScraper(driver, database, container)
        if not scraper.execute():
            fail_counter = fail_counter + 1
            logging.error("Scraper for container %s unsuccessful", container["container"])
    # Exception handling
    # except 
    except TrackingScraperAssertionError as ex:
        logging.error(str(ex))
        if ex.assertion_type:
            fail_counter = fail_counter + 1
            logging.error("Assertion for crucial %s elements failed! %d retrys left.", ex.process_type,
                          10 - fail_counter)
        else:
            logging.warning("Assertion for failure %s elements failed.", ex.process_type)
        continue
    except TrackingScraperError as ex:
        fail_counter = fail_counter + 1
        logging.error("Scraper error extracting container information: %s", str(ex))
        continue
    except Exception:
        fail_counter = fail_counter + 1
        logging.exception("Unknown exception ocurred in scraper, aborting")
        break
    # Print container time
    finally:
        container_end = time.time()
        logging.warning("Container time:", container_end - container_start, "seconds")
# input("Press Enter to quit")
driver.close()
end = time.time()
print("Total time:", end - start, "seconds")

Container time: 23.87007999420166 seconds
Container time: 20.351141214370728 seconds
Container time: 20.03908109664917 seconds
Container time: 20.14828085899353 seconds
Container time: 20.126195669174194 seconds
Container time: 20.146178722381592 seconds
Container time: 20.0802800655365 seconds
Container time: 20.131067276000977 seconds
Container time: 20.283770322799683 seconds
Container time: 20.013020992279053 seconds
Container time: 20.03262710571289 seconds
Container time: 24.531580924987793 seconds
Container time: 22.299418926239014 seconds
Container time: 22.364529609680176 seconds
Container time: 20.070008754730225 seconds
Container time: 20.154537439346313 seconds
Container time: 21.252553701400757 seconds
Container time: 20.17525029182434 seconds
Container time: 20.276474475860596 seconds
Container time: 20.009374141693115 seconds
Container time: 20.082116842269897 seconds
Container time: 20.10901165008545 seconds
Container time: 20.024295806884766 seconds
Container time: 19.

Container time: 19.918033361434937 seconds
Container time: 19.93840003013611 seconds
Container time: 19.984562873840332 seconds
Container time: 19.87780499458313 seconds
Container time: 19.933131217956543 seconds
Container time: 19.867501258850098 seconds
Container time: 21.51281976699829 seconds
Container time: 19.971491813659668 seconds
Container time: 20.086261510849 seconds
Container time: 19.99444031715393 seconds
Container time: 20.344223260879517 seconds
Container time: 20.044652938842773 seconds
Container time: 21.177571058273315 seconds
Container time: 19.885255336761475 seconds
Container time: 20.042527437210083 seconds
Container time: 22.372709035873413 seconds
Container time: 22.532155752182007 seconds
Container time: 20.059926748275757 seconds
Container time: 29.675188064575195 seconds
Container time: 20.719446897506714 seconds
Container time: 20.17211413383484 seconds
Container time: 19.922942638397217 seconds
Container time: 20.319498300552368 seconds
Container time: 19.

Container time: 20.141023635864258 seconds
Container time: 20.053938150405884 seconds
Container time: 32.074044704437256 seconds
Container time: 19.9927179813385 seconds
Container time: 20.046939611434937 seconds
Container time: 20.086098432540894 seconds
Container time: 20.01071286201477 seconds
Container time: 20.08312439918518 seconds
Container time: 20.12555980682373 seconds
Container time: 20.121289491653442 seconds
Container time: 19.833428144454956 seconds
Container time: 19.95577597618103 seconds
Container time: 20.295432567596436 seconds
Container time: 19.888549327850342 seconds
Container time: 19.93936014175415 seconds
Container time: 19.849762201309204 seconds
Container time: 19.90953254699707 seconds
Container time: 19.882676362991333 seconds
Container time: 19.96501111984253 seconds
Container time: 20.043500185012817 seconds
Container time: 19.923995971679688 seconds
Container time: 19.807429790496826 seconds
Container time: 19.833006381988525 seconds
Container time: 20.4

Container time: 19.991856575012207 seconds
Container time: 19.996662378311157 seconds
Container time: 20.238466501235962 seconds
Container time: 20.002007722854614 seconds
Container time: 19.99569797515869 seconds
Container time: 19.979087114334106 seconds
Container time: 20.02040123939514 seconds
Container time: 22.37650418281555 seconds
Container time: 19.924381494522095 seconds
Container time: 20.51944088935852 seconds
Container time: 20.543298959732056 seconds
Container time: 20.03937554359436 seconds
Container time: 20.0947527885437 seconds
Container time: 19.92266583442688 seconds
Container time: 19.841124296188354 seconds
Container time: 22.307941675186157 seconds
Container time: 20.068699836730957 seconds
Container time: 20.01420569419861 seconds
Container time: 20.96773362159729 seconds
Container time: 20.12485361099243 seconds
Container time: 20.060376167297363 seconds
Container time: 20.122977256774902 seconds
Container time: 20.151631832122803 seconds
Container time: 20.263

Container time: 20.099167823791504 seconds
Container time: 20.095072984695435 seconds
Container time: 20.340416431427002 seconds
Container time: 20.057825803756714 seconds
Container time: 20.17573595046997 seconds
Container time: 22.632333278656006 seconds
Container time: 22.387749910354614 seconds
Container time: 20.133719444274902 seconds
Container time: 19.950185298919678 seconds
Container time: 22.257399320602417 seconds
Container time: 19.91589331626892 seconds
Container time: 19.97129797935486 seconds
Container time: 22.390921354293823 seconds
Container time: 20.288517951965332 seconds
Container time: 20.095056772232056 seconds
Container time: 22.20467257499695 seconds
Container time: 20.423067331314087 seconds
Container time: 20.10653853416443 seconds
Container time: 20.218565702438354 seconds
Container time: 20.02221131324768 seconds
Container time: 19.944095849990845 seconds
Container time: 20.104249954223633 seconds
Container time: 22.456014156341553 seconds
Container time: 2

Container time: 19.953622817993164 seconds
Container time: 19.88492178916931 seconds
Container time: 20.583512783050537 seconds
Container time: 19.913910388946533 seconds
Container time: 19.936574459075928 seconds
Container time: 20.118430376052856 seconds
Container time: 19.919710874557495 seconds
Container time: 20.004305124282837 seconds
Container time: 22.29371166229248 seconds
Container time: 19.870741367340088 seconds
Container time: 19.952069759368896 seconds
Container time: 19.922947645187378 seconds
Container time: 19.958789348602295 seconds
Container time: 19.871967315673828 seconds
Container time: 22.266302347183228 seconds
Container time: 19.930577993392944 seconds
Container time: 20.222564935684204 seconds
Container time: 20.1995587348938 seconds
Container time: 20.0429208278656 seconds
Container time: 20.01306176185608 seconds
Container time: 20.067058324813843 seconds
Container time: 22.25542640686035 seconds
Container time: 20.017438173294067 seconds
Container time: 20.

Container time: 20.01083469390869 seconds
Container time: 22.587748765945435 seconds
Container time: 19.96297526359558 seconds
Container time: 20.126288414001465 seconds
Container time: 19.839258432388306 seconds
Container time: 20.108223915100098 seconds
Container time: 20.200891733169556 seconds
Container time: 20.289782524108887 seconds
Container time: 20.077735900878906 seconds
Container time: 22.408180475234985 seconds
Container time: 19.96503710746765 seconds
Container time: 19.90006971359253 seconds
Container time: 19.969701290130615 seconds
Container time: 19.932801485061646 seconds
Container time: 20.12317132949829 seconds
Container time: 19.903630256652832 seconds
Container time: 19.91747260093689 seconds
Container time: 19.925140380859375 seconds
Container time: 22.28291964530945 seconds
Container time: 19.890753746032715 seconds
Container time: 20.007426023483276 seconds
Container time: 22.946548223495483 seconds
Container time: 20.28301453590393 seconds
Container time: 20.

Container time: 20.075754642486572 seconds
Container time: 20.373663663864136 seconds
Container time: 20.254137992858887 seconds
Container time: 19.989657163619995 seconds
Container time: 19.876973628997803 seconds
Container time: 19.917372226715088 seconds
Container time: 19.86820697784424 seconds
Container time: 19.883366584777832 seconds
Container time: 19.953017234802246 seconds
Container time: 20.00936722755432 seconds
Container time: 20.267279386520386 seconds
Container time: 20.09222674369812 seconds
Container time: 20.205814599990845 seconds
Container time: 20.110759496688843 seconds
Container time: 20.031705141067505 seconds
Container time: 20.510828256607056 seconds
Container time: 19.870941877365112 seconds
Container time: 19.93294405937195 seconds
Container time: 19.873865842819214 seconds
Container time: 20.01705241203308 seconds
Container time: 19.886876583099365 seconds
Container time: 19.862514972686768 seconds
Container time: 20.057339191436768 seconds
Container time: 

Container time: 19.970983266830444 seconds
Container time: 19.929973363876343 seconds
Container time: 19.905843257904053 seconds
Container time: 19.93939971923828 seconds
Container time: 19.94221329689026 seconds
Container time: 19.979024648666382 seconds
Container time: 19.955881357192993 seconds
Container time: 20.008452892303467 seconds
Container time: 20.047037601470947 seconds
Container time: 20.84042501449585 seconds
Container time: 24.306602001190186 seconds
Container time: 20.035983324050903 seconds
Container time: 19.94368886947632 seconds
Container time: 19.947669506072998 seconds
Container time: 19.885969877243042 seconds
Container time: 33.15148878097534 seconds
Container time: 21.715843677520752 seconds
Container time: 20.044737815856934 seconds
Container time: 22.324032306671143 seconds
Container time: 20.10856795310974 seconds
Container time: 20.100613832473755 seconds
Container time: 20.05628204345703 seconds
Container time: 19.917032957077026 seconds
Container time: 19

Container time: 20.278342962265015 seconds
Container time: 20.24764919281006 seconds
Container time: 20.058138370513916 seconds
Container time: 20.211215496063232 seconds
Container time: 23.355879306793213 seconds
Container time: 20.11028742790222 seconds
Container time: 20.381585836410522 seconds
Container time: 20.33740782737732 seconds
Container time: 20.09116816520691 seconds
Container time: 20.02243685722351 seconds
Container time: 22.598668098449707 seconds
Container time: 20.35546875 seconds
Container time: 20.161590576171875 seconds
Container time: 20.727095127105713 seconds
Container time: 20.1142578125 seconds
Container time: 20.12335705757141 seconds
Container time: 22.63473153114319 seconds
Container time: 20.116812467575073 seconds
Container time: 20.135446786880493 seconds
Container time: 20.064019203186035 seconds
Container time: 36.3018274307251 seconds
Container time: 20.134236097335815 seconds
Container time: 20.116114616394043 seconds
Container time: 19.9587142467498

Container time: 21.438785076141357 seconds
Container time: 19.853647708892822 seconds
Container time: 19.844833850860596 seconds
Container time: 25.959805488586426 seconds
Container time: 19.748159885406494 seconds
Container time: 19.745105266571045 seconds
Container time: 20.15527629852295 seconds
Container time: 19.714668035507202 seconds
Container time: 19.75281572341919 seconds
Container time: 19.78264594078064 seconds
Container time: 19.74946641921997 seconds
Container time: 19.674508571624756 seconds
Container time: 19.718634843826294 seconds
Container time: 22.058085203170776 seconds
Container time: 19.86701989173889 seconds
Container time: 19.79590129852295 seconds
Container time: 19.750653982162476 seconds
Container time: 19.799119234085083 seconds
Container time: 19.82831048965454 seconds
Container time: 19.77556037902832 seconds
Container time: 19.68572759628296 seconds
Container time: 19.69176435470581 seconds
Container time: 19.803383111953735 seconds
Container time: 22.12

Container time: 22.183558464050293 seconds
Container time: 19.783287048339844 seconds
Container time: 20.208114624023438 seconds
Container time: 22.24499797821045 seconds
Container time: 20.063989877700806 seconds
Container time: 19.86505103111267 seconds
Container time: 19.775261640548706 seconds
Container time: 28.555973052978516 seconds
Container time: 22.010790586471558 seconds
Container time: 19.82457947731018 seconds
Container time: 19.75794219970703 seconds
Container time: 22.90132474899292 seconds
Container time: 19.6640145778656 seconds
Container time: 19.837788105010986 seconds
Container time: 22.087789058685303 seconds
Container time: 19.836371898651123 seconds
Container time: 19.745556354522705 seconds
Container time: 22.112910985946655 seconds
Container time: 19.726351261138916 seconds
Container time: 19.918843269348145 seconds
Container time: 19.76167917251587 seconds
Container time: 20.290717124938965 seconds
Container time: 19.689446926116943 seconds
Container time: 19.

Container time: 20.26778483390808 seconds
Container time: 19.72232174873352 seconds
Container time: 19.645321130752563 seconds
Container time: 19.70186996459961 seconds
Container time: 19.713188886642456 seconds
Container time: 19.757663011550903 seconds
Container time: 19.739132404327393 seconds
Container time: 19.977242946624756 seconds
Container time: 19.728421449661255 seconds
Container time: 19.748675107955933 seconds
Container time: 19.7475688457489 seconds
Container time: 19.775364875793457 seconds
Container time: 22.280648231506348 seconds
Container time: 19.7506000995636 seconds
Container time: 19.749462842941284 seconds
Container time: 19.675464630126953 seconds
Container time: 20.54079580307007 seconds
Container time: 19.955533266067505 seconds
Container time: 19.738753080368042 seconds
Container time: 19.868488550186157 seconds
Container time: 22.0738263130188 seconds
Container time: 19.740211009979248 seconds
Container time: 19.7572500705719 seconds
Container time: 21.9123

Container time: 19.729127407073975 seconds
Container time: 19.705508947372437 seconds
Container time: 22.023315906524658 seconds
Container time: 19.74551558494568 seconds
Container time: 19.72956418991089 seconds
Container time: 22.009764671325684 seconds
Container time: 19.73634648323059 seconds
Container time: 22.404008626937866 seconds
Container time: 20.20933699607849 seconds
Container time: 19.877597093582153 seconds
Container time: 22.079291343688965 seconds
Container time: 19.752835750579834 seconds
Container time: 22.137930870056152 seconds
Container time: 19.738155603408813 seconds
Container time: 19.724112272262573 seconds
Container time: 22.061781406402588 seconds
Container time: 22.157549381256104 seconds
Container time: 19.718501567840576 seconds
Container time: 20.876755475997925 seconds
Container time: 22.095373392105103 seconds
Container time: 1.9042906761169434 seconds
Container time: 0.21680784225463867 seconds
Container time: 0.0023953914642333984 seconds
Container t

ConnectionRefusedError: [Errno 111] Connection refused