# Find knowledges

In [6]:
from datetime import datetime, timedelta
from pymongo import MongoClient
from selenium import webdriver

import time

In [25]:
client = MongoClient()
database = client["tracking_scraper"]
single_collection = database["manifests"]
multiple_collection = database["knowledges"]

In [8]:
# Temporal
manifest = {'manifest': '450', 'year': '2019'}

## Input

In [11]:
driver = webdriver.Chrome(executable_path = "../../driver/chromedriver")
driver.get("http://www.aduanet.gob.pe/cl-ad-itconsmanifiesto/manifiestoITS01Alias?accion=cargarFrmConsultaManifiesto&tipo=M")
time.sleep(5)

# Write year input
year_input = driver.find_elements_by_name("CMc1_Anno")[0]
year_input.clear()
year_input.send_keys(manifest["year"])
time.sleep(1)

# Write manifest input
manifest_input = driver.find_elements_by_name("CMc1_Numero")[0]
manifest_input.clear()
manifest_input.send_keys(manifest["manifest"])
time.sleep(1)

# Click
button = driver.find_elements_by_css_selector("input[type=button]")[0]
button.click()
time.sleep(5)

## Output

In [20]:
# Get table
tables = driver.find_elements_by_tag_name("table")
table = tables[2]

# Get rows
rows = table.find_elements_by_tag_name("tr")

# Get arrival date
cell = rows[1].find_elements_by_tag_name("td")[1]
text = cell.text.strip()
try:
    date = datetime.strptime(text, "%d/%m/%Y %H:%M")
    manifest["arrival"] = date
except ValueError:
    manifest["arrival"] = None

# Get discharge date
cell = rows[2].find_elements_by_tag_name("td")[1]
text = cell.text.strip()
try:
    date = datetime.strptime(text, "%d/%m/%Y %H:%M")
    manifest["discharge"] = date
except ValueError:
    manifest["discharge"] = None

# Get ship
cell = rows[3].find_elements_by_tag_name("td")[1]
manifest["ship"] = cell.text.strip()

# Get transport enterprise
cell = rows[4].find_elements_by_tag_name("td")[1]
manifest["enterprise"] = cell.text.strip()

# Get transmission date (?)
try:
    cell = rows[7].find_elements_by_tag_name("td")[1]
    text = cell.text.strip()
    try:
        date = datetime.strptime(text, "%d/%m/%Y %H:%M:%S")
        manifest["transmission"] = date
    except ValueError as ex:
        manifest["transmission"] = None
        print("error parsing date", ex)
except IndexError:
    manifest["transmission"] = None
    print("no transmission date")

In [22]:
manifest

{'arrival': datetime.datetime(2019, 2, 21, 19, 0),
 'discharge': datetime.datetime(2019, 2, 22, 19, 24),
 'enterprise': '7770- TRABAJOS MARITIMOS S.A.',
 'manifest': '450',
 'ship': 'CARTAGENA EXPRESS',
 'transmission': datetime.datetime(2019, 2, 15, 14, 43, 17),
 'year': '2019'}

In [27]:
single_collection.find_one({
    "year": "2019",
    "manifest": "450"
})

{'_id': ObjectId('5c92f4109303fc27c8474435'),
 'arrival': datetime.datetime(2019, 3, 22, 0, 0),
 'created_at': datetime.datetime(2019, 3, 21, 2, 16, 48, 668000),
 'discharge': datetime.datetime(2019, 3, 23, 0, 17),
 'manifest': '450',
 'processed': False,
 'ship': 'CARTAGENA EXPRESS',
 'updated_at': None,
 'year': '2019'}

In [31]:
# Get table
try:
    table = tables[3]
    
    # Get rows
    rows = table.find_elements_by_css_selector("tr:not(:first-child)")
    print(len(rows), "knowledges found")
    
    for row in rows:
        # Get cells
        cells = row.find_elements_by_tag_name("td")

        # Declare knowledge
        knowledge = {
            "year": manifest["year"],
            "manifest": manifest["manifest"]
        }
        
        # Get origin port
        knowledge["origin_port"] = cells[0].text.strip()

        # Get knowledge identifier
        knowledge["knowledge"] = cells[2].text.strip()

        # Get detail number
        knowledge["detail"] = cells[4].text.strip()

        # Get consignee
        knowledge["consignee"] = cells[14].text.strip()

        # Get shipper
        knowledge["shipper"] = cells[15].text.strip()
        
        # Get destiny port
        knowledge["destiny_port"] = cells[17].text.strip()
        
        # Get transmission date
        text = cells[20].text.strip()
        try:
            date = datetime.strptime(text, "%d/%m/%Y %I:%M:%S %p")
            knowledge["transmission"] = date
        except ValueError:
            knowledge["transmission"] = None

        # Save knowledge
        print(knowledge)
        break
except IndexError:
    print("no knowledges found")

1892 knowledges found
{'transmission': datetime.datetime(2019, 2, 16, 16, 34, 10), 'year': '2019', 'origin_port': 'DELUH', 'knowledge': 'SUDUC9FRA000231X', 'shipper': 'BASF SE Carl Bosch Strasse 38', 'destiny_port': 'CLVAP', 'consignee': 'BASF CHILE SA', 'manifest': '450', 'detail': '1'}


In [32]:
driver.close()

## Check database