In [1]:
import itertools
import pandas as pd
import requests
import pickle

### Scaper
https://www.msc.com/en/search-a-schedule

In [2]:
# Define origin and destination countries
origin = ["BR", "CO", "VE", "SR", "CW", "GY", "GF", "UY", "AR", "CL", "PE", "EC", "VN"]
destination = ["NL", "BE"]

In [3]:
# Create DataFrames from UN-LOCODE CSVs
country_df = pd.read_csv("../utils/country-codes.csv")
country_df.set_index("CountryCode", inplace=True)
country_dict = country_df.to_dict()["CountryName"]

In [4]:
o_dict = {code: name for code, name in country_dict.items() if code in origin}
d_dict = {code: name for code, name in country_dict.items() if code in destination}

In [5]:
with open('../pickles/msc_country_port_codes.pickle', 'rb') as handle:
    country_port_codes = pickle.load(handle)

In [6]:
o_ports = [port for port in country_port_codes["Ports"] if port["CountryIsoCode"] in origin]
d_ports = [port for port in country_port_codes["Ports"] if port["CountryIsoCode"] in destination]

o_ids = [port['PortId'] for port in o_ports]
d_ids = [port['PortId'] for port in d_ports]

port_codes = {port['PortId']: port['LocationCode'] for port in o_ports + d_ports}

In [7]:
# Make list with all combinations
od_ids = list(itertools.product(o_ids, d_ids))
print(f"{(n_combs := len(od_ids))} combinations of ports ({len(o_ids)} origins * {len(d_ids)} destinations)")

288 combinations of ports (48 origins * 6 destinations)


In [8]:
# Use today's date, by default
from datetime import date
today = date.today()
print(today)

2022-10-31


In [9]:
url = "https://www.msc.com/api/feature/tools/SearchSailingRoutes"

headers = {
    "authority": "www.msc.com",
    "accept": "application/json, text/plain, */*",
    "content-type": "application/json",
    "x-requested-with": "XMLHttpRequest"
}

data = []
for n, (o, d) in enumerate(od_ids[:10]):
    o_code = port_codes[int(o)]
    d_code = port_codes[int(d)]

    payload = {
        "FromDate": str(today),
        "fromPortId": o,
        "toPortId": d,
        "isDirectRouteOnly": False,
        "language": "en"
    }

    response = requests.request("POST", url, json=payload, headers=headers)

    # Check if request was succesfull
    rdict = response.json()
    succes = rdict["IsSuccess"]
    if not succes:
        print(f"Warning: No succes on request {n} {o_code, d_code}")
        continue

    # Save data
    new_data = rdict["Data"][0]
    for i in range(len(new_data["Routes"])):
        new_data["Routes"][i]["Origin"] = o_code
        new_data["Routes"][i]["Destination"] = d_code
    data.append(new_data)

    if n % 5:
        print(f"Scraped {n}/{n_combs}")

Scraped 1/288
Scraped 3/288


In [10]:
# Flatten trip route data
route_data = []
for d in data:
    for route in d["Routes"]:
        route_data.append(route)

In [11]:
df = pd.DataFrame(route_data)
print(f"Done. DataFrame has {df.index.size} entries")

cols = df.columns.tolist()
cols = cols[-2:] + cols[:-2]
df = df[cols]
df.head(10)

Done. DataFrame has 4 entries


Unnamed: 0,Origin,Destination,CO2FootPrint,CutOffs,EstimatedDepartureDateFormatted,EstimatedDepartureDate,EstimatedArrivalDateFormatted,EstimatedArrivalDate,VesselName,DepartureVoyageNo,TotalTransitTime,TotalTransitTimeHours,RouteScheduleLegDetails
0,CLARI,BEANR,1.6035 Tons,{'ContainerYardCutOffDate': 'Mon 31st Oct 2022...,Thu 3rd Nov 2022,2022-11-03T23:30:00,Tue 6th Dec 2022,2022-12-06T16:15:00,MSC BALTIC III,PM243R,33 days,792,"[{'LegSequence': 1, 'SingleLegOnly': False, 'L..."
1,CLARI,BEANR,1.6035 Tons,"{'ContainerYardCutOffDate': '', 'ReeferCutOffD...",Fri 11th Nov 2022,2022-11-11T08:00:00,Wed 7th Dec 2022,2022-12-07T11:00:00,MSC BALTIC III,PM244R,26 days,624,"[{'LegSequence': 1, 'SingleLegOnly': False, 'L..."
2,CLARI,NLRTM,1.5981 Tons,{'ContainerYardCutOffDate': 'Mon 31st Oct 2022...,Thu 3rd Nov 2022,2022-11-03T23:30:00,Mon 5th Dec 2022,2022-12-05T05:15:00,MSC BALTIC III,PM243R,32 days,768,"[{'LegSequence': 1, 'SingleLegOnly': False, 'L..."
3,CLARI,NLRTM,1.5981 Tons,"{'ContainerYardCutOffDate': '', 'ReeferCutOffD...",Fri 11th Nov 2022,2022-11-11T08:00:00,Fri 9th Dec 2022,2022-12-09T01:15:00,MSC BALTIC III,PM244R,28 days,672,"[{'LegSequence': 1, 'SingleLegOnly': False, 'L..."
