In [36]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

import re
import json

In [43]:
class TrackingScraper:
    
    """
    Constructor por defecto.
    """
    def __init__(self, container, carrier):
        try:
            # Open the Firefox WebDriver
            self.driver = webdriver.Firefox()
            # Save container number in a variable
            self.container = container
            # Define tracking information
            self.tracking = {
                "general": {
                    "container_number": container
                },
                "movements": []
            }
            # Get tracking configuration information
            if (!self.__get_config_info(carrier)):
                self.driver.close()

        except WebDriverException as ex:
            print("Error opening webdriver: ", ex)
    
    """
    Obtiene
    """
    def __get_config_info(self, carrier):
        try:
            with open("../config/" + carrier + ".json") as config:
                try:
                    self.config = json.load(config)
                    return True
                except JSONDecodeError as ex:
                    print("Error while reading configuration file: ", ex)
                    return False
        except FileNotFoundError:
            print("Configuration file was not found, aborting...")
            return False

    def go_to_url(self):
        try:
            self.driver.get(self.config["general"]["url"].format(container = self.container))
        except Exception as ex:
            print("Error ocurred while going to URL: ", ex)
        
    def __del__(self):
        try:
            self.driver.close()
        except AttributeError:
            print("del: driver not declared") # Driver no fue declarado, así que no importa
        except InvalidSessionIdException:
            print("del: driver already closed") # Driver ya fue cerrado

In [41]:
scraper = TrackingScraper("FSCU5670046", "hapag-lloyd")
scraper.go_to_url()

In [11]:
elements = driver.find_elements(By.CLASS_NAME, "inputNonEdit")

# Container type
tracking["general"]["container_type"] = elements[0].text

# Container description
tracking["general"]["container_description"] = elements[1].text

# Container dimensions
text_group = elements[2].text.split(" X ")
tracking["general"]["container_length"] = text_group[0]
tracking["general"]["container_height"] = text_group[2]
tracking["general"]["container_width"]  = text_group[1]

# Container tare
tracking["container"]["tare"] = elements[3].text

# Container maximum payload
tracking["container"]["max_payload"] = elements[4].text

In [15]:
elements = driver.find_elements(By.CSS_SELECTOR, '.hal-table tbody tr')
tracking["movements"] = []

for element in elements:
    # Get items and create new movement
    items = element.find_elements(By.TAG_NAME, 'td')
    movement = {
        "transport": {}
    }
    
    # Container status in movement
    movement["status"] = items[0].text
    
    # Container location in movement
    movement["location"] = items[1].text
    
    # Container movement date
    movement["date"] = items[2].text
    
    # Container movement time
    movement["time"] = items[3].text
    
    # Container movement transport ship
    movement["transport_ship"] = items[4].text
    
    # Container movement transport voyage number
    try:
        text = items[5].text
        if (len(text) > 0):
            movement["transport_voyage"] = text
    except IndexError:
        pass
    
    # Add movement to list
    tracking["movements"].append(movement)


In [16]:
tracking

{'container': {'description': 'REEFER CONTAINER',
  'dimensions': {'height': '9\'6"', 'length': "40'", 'width': "8'"},
  'max_payload': '29360',
  'tare': '4640',
  'type': '45RT'},
 'general': {},
 'last_route': {},
 'movements': [{'date': '2019-01-26',
   'location': 'CALLAO',
   'status': 'Gate out empty',
   'time': '09:59',
   'transport_ship': 'Truck'},
  {'date': '2019-01-27',
   'location': 'CALLAO',
   'status': 'Arrival in',
   'time': '06:01',
   'transport_ship': 'Truck'},
  {'date': '2019-01-29',
   'location': 'CALLAO',
   'status': 'Loaded',
   'time': '07:51',
   'transport_ship': 'CAP SAN TAINARO',
   'transport_voyage': '8151N'},
  {'date': '2019-01-29',
   'location': 'CALLAO',
   'status': 'Vessel departed',
   'time': '23:42',
   'transport_ship': 'CAP SAN TAINARO',
   'transport_voyage': '8151N'},
  {'date': '2019-02-19',
   'location': 'ROTTERDAM',
   'status': 'Vessel arrived',
   'time': '16:42',
   'transport_ship': 'CAP SAN TAINARO',
   'transport_voyage': '8

In [17]:
driver.close()

In [18]:
import json
with open('hapag-lloyd/' + container + '.json', 'w') as file:
    json.dump(tracking, file)