In [1]:
from pymongo import MongoClient

client = MongoClient()
database = client["tracking_scraper"]

In [2]:
manifests_table  = database["manifests"]
knowledges_table = database["knowledges"]
containers_table = database["containers"]
prefixes_table   = database["container_prefixes"]
carriers_table   = database["carriers"]

In [3]:
def query_containers_all(carrier_name):
    # Create query
    query = make_container_query(carrier_name)
    items = containers_table.find(query).sort("_id", -1)
    # Print each by each
    iterate_containers_all(items)

def iterate_containers_all(items):
    for item in items:
        show_container_info(item)
        if input("Press Enter to continue, or type 'quit' to exit: ").lower() == "quit":
            break

def make_container_query(carrier_name):
    return {
        "carrier": carrier_name,
        "processed": False
    }

In [4]:
def query_containers(carrier_name):
    # Create query
    query = make_container_query(carrier_name)
    # Print first 5 items
    iterate_containers(query, 1, 5)
    # Print last 5 items
    iterate_containers(query, -1, 5)

def iterate_containers(query, order_by, limit_count):
    items = containers_table.find(query).sort("_id", order_by).limit(limit_count)
    for item in items:
        show_container_info(item)

def show_container_info(item):
    print("Container:", item["container"])
    print("- Manifest + detail:", item["year"] + "-" + item["manifest"], "@", item["detail"])
    print("- Created at:", item["_id"].generation_time.strftime("%d/%m/%Y %H:%M:%S"))

In [5]:
def count_containers(carrier_name):
    query = make_container_query(carrier_name)
    return containers_table.count_documents(query)

def distinct_containers(carrier_name):
    query = make_container_query(carrier_name)
    items = containers_table.distinct("container", query)
    return len(items)

def distinct_containers_all(carrier_name):
    query = make_container_query(carrier_name)
    items = containers_table.distinct("container", query)
    print(items)
    # iterate_containers_all(items)

## Textainer

In [15]:
query_containers("Textainer")
# count_containers("Textainer")

Container: TGBU5402076
- Manifest + detail: 2019-450 @ 23
- Created at: 23/03/2019 05:01:14
Container: TGBU6638019
- Manifest + detail: 2019-450 @ 68
- Created at: 23/03/2019 05:08:57
Container: TGHU3270291
- Manifest + detail: 2019-450 @ 75
- Created at: 23/03/2019 05:10:10
Container: TEMU3806660
- Manifest + detail: 2019-450 @ 80
- Created at: 23/03/2019 05:11:01
Container: TGHU4876787
- Manifest + detail: 2019-450 @ 94
- Created at: 23/03/2019 05:13:26
Container: TGHU9921153
- Manifest + detail: 2019-613 @ C51
- Created at: 31/03/2019 01:27:49
Container: TEMU9032730
- Manifest + detail: 2019-613 @ C61
- Created at: 31/03/2019 01:26:05
Container: TGHU9571883
- Manifest + detail: 2019-613 @ C62
- Created at: 31/03/2019 01:25:54
Container: TGHU9759206
- Manifest + detail: 2019-613 @ C63
- Created at: 31/03/2019 01:25:44
Container: TEMU9519440
- Manifest + detail: 2019-613 @ C69
- Created at: 31/03/2019 01:24:42


In [7]:
distinct_containers("Textainer")

7255

## Hapag-Lloyd

In [8]:
# query_containers("Hapag-Lloyd")
count_containers("Hapag-Lloyd")

9987

In [9]:
distinct_containers("Hapag-Lloyd")

9453

## Evergreen

In [10]:
# query_containers("Evergreen")
count_containers("Evergreen")

1974

In [14]:
distinct_containers("Evergreen")

1936

## Maersk

In [12]:
# query_containers("Maersk")
count_containers("Maersk")

11031

In [13]:
distinct_containers("Maersk")

10571