In [4]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

import re
import json

In [5]:
container = "MAEU6835658"
tracking = {
    "general": {},
    "container": {
        "code": container,
        "dimensions": {}
    },
    "last_route": {},
    "movements": []
}
url = "https://www.maersk.com/tracking/#tracking/{container}"

In [6]:
driver = webdriver.Firefox()
driver.get(url.format(container = container))

In [8]:
# Click to display
button = driver.find_elements(By.CSS_SELECTOR, ".expandable-table__call-to-action .button")
button[0].click()

In [9]:
elements = driver.find_elements(By.CLASS_NAME, "font--display-1--heavy")

# Origin point
tracking["general"]["origin"]      = elements[0].text

# Destination point
tracking["general"]["destination"] = elements[1].text

In [10]:
elements = driver.find_elements(By.CSS_SELECTOR, ".expandable-table__wrapper td")

# Container description
subelements = elements[1].find_elements(By.TAG_NAME, "span")
tracking["container"]["description"] = subelements[3].text

# Estimated arrival time
subelements = elements[2].find_elements(By.TAG_NAME, "span")
tracking["general"]["estimated_arrival"] = subelements[3].text

In [13]:
# Last route information
subelements = elements[3].find_elements(By.TAG_NAME, "span")
text_group = subelements[3].text.split('\n')
text_subgroup = text_group[0].split(' • ')

# Last route status
tracking["last_route"]["status"] = text_subgroup[0]
# Last route location
tracking["last_route"]["location"] = text_subgroup[1]
# Last route date
tracking["last_route"]["date"] = text_group[1]

In [20]:
# Route information
elements = driver.find_elements(By.CLASS_NAME, "timeline__event-table")
tracking["movements"] = []

for element in elements:
    movement_common = {}
    
    # Get common elements
    common_subelements = element.find_elements(By.CLASS_NAME, "")
    
    subelements = element.find_elements(By.CLASS_NAME, "timeline__event-table__row")
    
    for i, subelement in enumerate(subelements):
        movement_common = {
            "transport": {}
        }
        
        if i == 0:
            text_group = subelement.text.split('\n')
            
            # Get movement terminal name for all movements in subelement
            movement_common["terminal"] = text_group[0]
            
            # Get movement location for all movements in subelement
            movement_common["location"] = text_group[1]
        
        else:
            movement_specific = movement_common.copy()
            items = subelement.find_elements(By.CLASS_NAME, "timeline__event-table__cell")
            
            text_group = items[0].text.split('\n')
            
            # Get movement date and time
            movement_specific["date"] = text_group[0]
            try:
                movement_specific["time"] = text_group[1]
            except IndexError:
                pass
            
            text_group = items[1].text.split('\n')
            text_subgroup = text_group[0].split(' on ')
            
            # Get movement status and voyage
            movement_specific["status"] = text_subgroup[0]
            try:
                movement_specific["transport"]["ship"] = text_subgroup[1]
            except IndexError:
                pass
            try:
                movement_specific["transport"]["voyage"] = text_group[1]
            except IndexError:
                pass
            
            # Determine if movement is estimated or real
            status = subelement.get_attribute("data-status")
            movement_specific["is_estimated"] = (status == "planned")
            
            # Add movement to list
            tracking["movements"].append(movement_specific)

In [21]:
tracking

{'container': {'code': 'MAEU6835658',
  'description': '20ft Dry Container',
  'dimensions': {}},
 'general': {'destination': 'Melbourne',
  'estimated_arrival': '02 Mar 2019',
  'origin': 'Izmit Korfezi'},
 'last_route': {'date': '13 Feb 2019',
  'location': 'Singapore, Singapore',
  'status': 'Load'},
 'movements': [{'date': '08 Jan 2019',
   'is_estimated': False,
   'status': 'Stuffing',
   'time': '18:26',
   'transport': {}},
  {'date': '13 Jan 2019',
   'is_estimated': False,
   'status': 'Load',
   'time': '11:24',
   'transport': {'ship': 'MSC RAPALLO', 'voyage': 'Voyage No.903E'}},
  {'date': '13 Feb 2019',
   'is_estimated': False,
   'status': 'Discharge',
   'time': '10:46',
   'transport': {}},
  {'date': '13 Feb 2019',
   'is_estimated': False,
   'status': 'Load',
   'time': '17:14',
   'transport': {'ship': 'MOL GROWTH', 'voyage': 'Voyage No.847S'}},
  {'date': '02 Mar 2019',
   'is_estimated': True,
   'status': 'Discharge',
   'transport': {}}]}

In [22]:
driver.close()

In [23]:
import json
with open('maersk/' + container + '.json', 'w') as file:
    json.dump(tracking, file)