In [119]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys

from datetime import datetime, timedelta
import time

In [175]:
driver = webdriver.Chrome(executable_path = "../driver/chromedriver")
driver.get("http://www.aduanet.gob.pe/aduanas/informao/HRMCFLlega.htm")

# Find manifests

## Input

In [3]:
date_input = driver.find_elements_by_name("CMc2_Fecha1")[0]

In [4]:
last_month = datetime.strftime(datetime.today() - timedelta(25), "%d/%m/%Y")

In [5]:
date_input.clear()
date_input.send_keys(last_month)

In [6]:
button = driver.find_elements_by_css_selector("input[type=button]")[0]

In [7]:
button.click()

## Output

In [8]:
table = driver.find_elements_by_tag_name("table")[3]

In [9]:
table_manifests = table.find_elements_by_css_selector("tr:not(:first-child) a")

In [10]:
manifests = []
for table_manifest in table_manifests:
    split = table_manifest.text.split(" - ")
    manifests.append({
        "year": "20" + split[0],
        "manifest": split[1]
    })

In [121]:
manifests

[{'manifest': '46', 'year': '2019'},
 {'manifest': '383', 'year': '2019'},
 {'manifest': '436', 'year': '2019'},
 {'manifest': '465', 'year': '2019'},
 {'manifest': '494', 'year': '2019'},
 {'manifest': '439', 'year': '2019'},
 {'manifest': '450', 'year': '2019'},
 {'manifest': '453', 'year': '2019'},
 {'manifest': '467', 'year': '2019'},
 {'manifest': '456', 'year': '2019'},
 {'manifest': '462', 'year': '2019'},
 {'manifest': '400', 'year': '2019'},
 {'manifest': '449', 'year': '2019'},
 {'manifest': '458', 'year': '2019'},
 {'manifest': '445', 'year': '2019'},
 {'manifest': '305', 'year': '2019'},
 {'manifest': '505', 'year': '2019'},
 {'manifest': '473', 'year': '2019'},
 {'manifest': '420', 'year': '2019'},
 {'manifest': '396', 'year': '2019'},
 {'manifest': '492', 'year': '2019'},
 {'manifest': '463', 'year': '2019'},
 {'manifest': '464', 'year': '2019'},
 {'manifest': '306', 'year': '2019'},
 {'manifest': '498', 'year': '2019'},
 {'manifest': '440', 'year': '2019'},
 {'manifest':

In [12]:
import json
with open("../db/manifests-20190319.json", "w") as file:
    json.dump(manifests, file)

# Find knowledges

## Input

In [169]:
manifest = manifests[3]
print(manifest)

{'manifest': '465', 'year': '2019'}


In [176]:
driver.get("http://www.aduanet.gob.pe/cl-ad-itconsmanifiesto/manifiestoITS01Alias?accion=cargarFrmConsultaManifiesto&tipo=M")
# Sleep
time.sleep(5)
# Write year input
year_input = driver.find_elements_by_name("CMc1_Anno")[0]
year_input.clear()
year_input.send_keys(manifest["year"])
# Write manifest input
manifest_input = driver.find_elements_by_name("CMc1_Numero")[0]
manifest_input.clear()
manifest_input.send_keys(manifest["manifest"])
# Sleep
time.sleep(2)
# Click
button = driver.find_elements_by_css_selector("input[type=button]")[0]
button.click()
# Sleep
time.sleep(5)
# Get table
tables = driver.find_elements_by_tag_name("table")
try:
    table = tables[3]
    print("knowledges found")
except IndexError:
    print("no knowledges found")

knowledges found


# Output

In [152]:
rows = table.find_elements_by_css_selector("tr:not(:first-child)")

In [155]:
knowledges = []
for row in rows:
    # Get cells
    cells = row.find_elements_by_tag_name("td")
    
    # Declare knowledge
    knowledge = {
        "year": manifest["year"],
        "manifest": manifest["manifest"]
    }
    
    # Get knowledge identifier
    knowledge["knowledge"] = cells[2].text
    
    # Get link
    # link = cells[4].find_elements_by_tag_name("a")[0]
    # knowledge["link"] = link.get_attribute("href")
    
    # Get consignee
    knowledge["consignee"] = cells[14].text.strip()
    
    # Get shipper
    knowledge["shipper"] = cells[15].text.strip()
    
    # Save knowledge
    knowledges.append(knowledge)

In [168]:
knowledges

[{'consignee': 'MAXAM FANEXA S.A.M.',
  'knowledge': 'SUDUN8484AE0Q001',
  'manifest': '465',
  'shipper': 'WILHELM G. CLASEN GMBH & CO. KG',
  'year': '2019'},
 {'consignee': 'LOURDES MOLINA M.',
  'knowledge': 'SUDUN8BLW001160X',
  'manifest': '465',
  'shipper': 'CV. SEJAHTERA',
  'year': '2019'},
 {'consignee': 'SPARX LOGISTICS CHILE',
  'knowledge': 'SUDUN8998AE3BXRD',
  'manifest': '465',
  'shipper': 'SPARX LOGISTICS LIMITED',
  'year': '2019'},
 {'consignee': 'SPARX LOGISTICS CHILE',
  'knowledge': 'SUDUN8998AE3BJPR',
  'manifest': '465',
  'shipper': 'SPARX LOGISTICS LIMITED',
  'year': '2019'},
 {'consignee': 'SPARX LOGISTICS CHILE',
  'knowledge': 'SUDUN8998AE3BZ3M',
  'manifest': '465',
  'shipper': 'SPARX LOGISTICS LIMITED',
  'year': '2019'},
 {'consignee': 'XIMENA ACUNA BAZAN DE BARRIONUEVO',
  'knowledge': 'SUDUN8998AE3B195',
  'manifest': '465',
  'shipper': 'SUNWAY LOGISTICS CO. LTD.',
  'year': '2019'},
 {'consignee': 'POWER SOLUTIONS & LOGISTICS',
  'knowledge': 'SU

In [163]:
link = ("http://www.aduanet.gob.pe/cl-ad-itconsmanifiesto/manifiestoITS01Alias?accion=consultar"
        "DetalleConocimientoEmbarque&CMc2_Anno={year}&CMc2_Numero={manifest}&CMc2_NumDet=1&"
        "CG_cadu=118&CMc2_TipM=mc&CMc2_numcon={knowledge}").format(**knowledges[0])
# print(link)
driver.get(link)

In [173]:
len(knowledges)

376

# Find containers

## Input

In [177]:
try:
    for knowledge in knowledges:
        print(knowledge)

        # Go to knowledge link
        link = ("http://www.aduanet.gob.pe/cl-ad-itconsmanifiesto/manifiestoITS01Alias?accion=consultar"
                "DetalleConocimientoEmbarque&CMc2_Anno={year}&CMc2_Numero={manifest}&CMc2_NumDet=1&"
                "CG_cadu=118&CMc2_TipM=mc&CMc2_numcon={knowledge}").format(**knowledge)
        print(link)
        driver.get(link)
        time.sleep(10)

        # Get titles
        titles = driver.find_elements_by_css_selector("body > font > b")

        if (len(titles) > 0) and (titles[0].text == "Contenedores"):
            # Get table
            try:
                table = driver.find_elements_by_tag_name("table")[3]
            except IndexError:
                print("knowledge", knowledge["knowledge"], "- table not found")
                continue

            # Get rows
            rows = table.find_elements_by_css_selector("tr:not(:first-child)")

            # Get containers
            containers = []
            for row in rows:
                # Get cells
                cells = row.find_elements_by_tag_name("td")

                # Save container number
                try:
                    container = cells[0].text.strip()
                except IndexError:
                    print("knowledge", knowledge["knowledge"], "- container not found")
                    continue

                # print("container:", container)
                containers.append(container)

            # Save all containers
            knowledge["containers"] = containers
            print("knowledge", knowledge["knowledge"], "- containers saved")

        else:
            print("knowledge", knowledge["knowledge"], "- no containers")

        driver.back()
        time.sleep(5)
except KeyboardInterrupt:
    print("cancelled")

{'knowledge': 'SUDUN8484AE0Q001', 'shipper': 'WILHELM G. CLASEN GMBH & CO. KG', 'containers': ['PONU0146264'], 'manifest': '465', 'consignee': 'MAXAM FANEXA S.A.M.', 'year': '2019'}
http://www.aduanet.gob.pe/cl-ad-itconsmanifiesto/manifiestoITS01Alias?accion=consultarDetalleConocimientoEmbarque&CMc2_Anno=2019&CMc2_Numero=465&CMc2_NumDet=1&CG_cadu=118&CMc2_TipM=mc&CMc2_numcon=SUDUN8484AE0Q001
knowledge SUDUN8484AE0Q001 - containers saved
{'knowledge': 'SUDUN8BLW001160X', 'shipper': 'CV. SEJAHTERA', 'containers': ['PONU0146264'], 'manifest': '465', 'consignee': 'LOURDES MOLINA M.', 'year': '2019'}
http://www.aduanet.gob.pe/cl-ad-itconsmanifiesto/manifiestoITS01Alias?accion=consultarDetalleConocimientoEmbarque&CMc2_Anno=2019&CMc2_Numero=465&CMc2_NumDet=1&CG_cadu=118&CMc2_TipM=mc&CMc2_numcon=SUDUN8BLW001160X
cancelled


In [144]:
with open("../db/manifests/{year}-{manifest}.json".format(**manifest), "w") as file:
    json.dump(knowledges, file)

'\nwww.aduanet.gob.pe/cl-ad-itconsmanifiesto/manifiestoITS01Alias?accion=consultarDetalleConocimientoEmbarque\n&CMc2_Anno={year}&CMc2_Numero={manifest}&CMc2_NumDet=1&CG_cadu=118&CMc2_TipM=mc&CMc2_numcon={knowledge}\n'

# End

In [60]:
driver.close()

Dentro de un contenedor
- cada cliente llena cierto volumen que se coloca en un contenedor
- un agente de carga origen envía a un destino => para aduanas se maneja con los manifiestos

cuando la nave llega al puerto

ABCDE
AB: dos primeras iniciales del pais
CDE: tres primeras iniciales del puerto

DFSU2255679

table[3]

tr:not(:first-child)

td[0]
