In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys #currently not used
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.action_chains import ActionChains
from bs4 import BeautifulSoup
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import time
import pandas as pd

In [2]:
#sets up the options of the chromedriver
opts = Options()
opts.add_argument("window-size=1280,720") #locks the window size
opts.add_argument("user-agent=Chrome/106.0.5249.119") #Prevents sites from blocking traffic
headless = True

if headless: #if True, open chrome on the background without window
    opts.headless = False

driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()), options=opts)

[WDM] - Downloading: 100%|██████████| 6.46M/6.46M [00:00<00:00, 25.3MB/s]


In [3]:
#Open Maersk point to point site
driver.get("https://www.maersk.com/schedules/pointToPoint")
time.sleep(3)
#Click to allow cookies
driver.find_element(By.XPATH,"//*[@id='coiPage-1']/div[2]/button[3]").click()

#fill in the origin location
originloc = driver.find_element(By.ID,'originLocation')
originloc.send_keys("Santos (Sao Paulo), Brazil")

#a dropdown menu has to be clicked in order to confirm the origin location. This clicks the correct port
time.sleep(3) #Makes sure that the element is actually clickable
action = ActionChains(driver)
action.move_to_element_with_offset(originloc, 0, 50)
action.click()
action.perform()

#Fills in the destination location automatically.
destinationloc = driver.find_element(By.ID,'destinationLocation')
destinationloc.send_keys("Rotterdam")

#a dropdown menu has to be clicked in order to confirm the origin location. This clicks the correct port
time.sleep(3)
action = ActionChains(driver)
action.move_to_element_with_offset(destinationloc, 0, 50)
action.click()
action.perform()

#Click the search button
search_button = driver.find_element(By.XPATH,'//*[@id="app"]/div[2]/span/form/div[6]/button')
search_button.click()

time.sleep(4)
#This list contains all the Xpaths to the buttons that expand the route details
list_xpath_routedetails =["//*[@id='app']/div[2]/div[1]/div[3]/div/div[4]/button/span","//*[@id='app']/div[2]/div[1]/div[4]/div/div[4]/button/span","//*[@id='app']/div[2]/div[1]/div[5]/div/div[4]/button/span",
                          "//*[@id='app']/div[2]/div[1]/div[6]/div/div[4]/button/span","//*[@id='app']/div[2]/div[1]/div[7]/div/div[4]/button/span"]

time.sleep(3)
#Clicks the buttons to expand the route details
for i in list_xpath_routedetails:
    driver.find_element(By.XPATH,i).click()

#Copy's the page to use in Beautifulsoup
page_source = driver.page_source
soup = BeautifulSoup(page_source)

#Closes the webdriver after a few seconds
driver.quit()

In [4]:
#Lists all the data on routes. The data on routes is already grouped.
routes = soup.find_all("div", class_="ptp-results__transport-plan")

In [5]:
def process_data_route(route,list_routes):
    #The origin port is the first port in the list, destination the last
    origin = list_ports[0]
    destination = list_ports[-1]

    print(origin)
    print(destination)

    info_destination = route.find(class_="ptp-results__transport-plan--item-final")
    arrival_date = info_destination.find(class_="transport-label font--small")
    arrival_date = arrival_date.find_all(class_="font--small")
    print(arrival_date[1].text)

    info_departure = route.find(class_="transport-label font--small")

    departure_date = info_departure.find(class_="font--small").text
    print(departure_date)

    vessel_name = info_departure.find(class_="rich-text").text
    vessel_name = vessel_name.split()
    vessel_name.remove('Departing')
    vessel_name.remove('on')
    vessel_name.remove("/")
    vessel_name.pop(-1)
    vessel_name = ' '.join(vessel_name)
    print(vessel_name)

    vessel_info = route.find(class_="vessel")

    imo = vessel_info.find(class_="imo").text
    imo = imo.removeprefix('IMO Number')
    print(imo)

    service = vessel_info.find(class_="service").text
    service = service.removeprefix('Service')
    print(service)

    flag = vessel_info.find(class_="flag").text
    flag = flag.removeprefix('Flag')
    print(flag)

    callsign = vessel_info.find(class_="callsign").text
    callsign = callsign.removeprefix('Call Sign')
    print(callsign)

    built_year_ship = vessel_info.find(class_="built").text
    built_year_ship = built_year_ship.removeprefix('Built')
    print(built_year_ship)

    if len(list_ports)>2: #If the route contains only a origin and destination, the code will be different from routes with transshipment ports
        process_data_transfer(route,list_ports)

In [6]:
def process_data_transfer(route,list_ports):
    for i in range(1,len(list_ports)-1):
        #item 1 is a port, 2 a ship, 3 a port and so on
        #The following if statement makes sure that data of a port
        #is actually read as a port
        if (i % 2) == 1:
            transit_port = route.find_all(class_="ptp-results__transport-plan--item")[i]

            info_arrival = transit_port.find(class_="transport-label font--small")

            arrival_date = info_arrival.find_all(class_="font--small")[1].text
            print(arrival_date)

            transit_ship = route.find_all(class_="ptp-results__transport-plan--item")[i+1]

            info_departure = transit_ship.find(class_="transport-label font--small")
            departure_date = info_departure.find(class_="font--small").text
            print(departure_date)

            vessel_name = info_departure.find(class_="rich-text").text
            vessel_name = vessel_name.split()
            vessel_name.remove('Departing')
            vessel_name.remove('on')
            vessel_name.remove("/")
            vessel_name.pop(-1)
            vessel_name = ' '.join(vessel_name)
            print(vessel_name)

            vessel_info = transit_ship.find(class_="vessel")

            imo = vessel_info.find(class_="imo").text
            imo = imo.removeprefix('IMO Number')
            print(imo)

            service = vessel_info.find(class_="service").text
            service = service.removeprefix('Service')
            print(service)


            flag = vessel_info.find(class_="flag").text
            flag = flag.removeprefix('Flag')
            print(flag)

            callsign = vessel_info.find(class_="callsign").text
            callsign = callsign.removeprefix('Call Sign')
            print(callsign)

            built_year_ship = vessel_info.find(class_="built").text
            built_year_ship = built_year_ship.removeprefix('Built')
            print(built_year_ship)

In [7]:
route = routes[0]

#The Maersk site does not show all ports that are on the route
#It only provides information on ports which are either origin, destination or transshipment ports
#Transshipment in this context means a port where the container is moved to a different ship.
#The following few lines detects all ports in a route
ports = route.find_all("div", class_="location")
list_ports =[]
for p in ports:
    city = p.find("div", class_="font--default--bold").text
    terminal = p.find("div", class_="font--small").text
    port = city + ' ' + terminal
    list_ports.append(port)

In [8]:
process_data_route(route,list_ports)

Santos Brasil Terminal Portuario
Rotterdam Hutchison Ports Delta II
10 Dec 2022 07:00
17 Nov 2022 07:00
SEBASTIAO CABOTO
-
BOSSANOVA XL
-
-
-
29 Nov 2022 20:00
03 Dec 2022 18:00
CAP SAN LORENZO
9622227
NEOSAMBA
DK
OXOF2
2013
