In [26]:
from pymongo import MongoClient

In [27]:
database   = MongoClient()["scraper2"]
containers = database["containers"]
movements  = database["container_movements"]
locations  = database["locations"]

# Datos de la naviera

## Maersk

In [28]:
query = {
    "container": {
        "$regex": "(APMU|COZU|FAAU|FRLU|KNLU|LOTU|MAEU|MALU|MCAU|MCHU|MCRU|MHHU|MIEU|MMAU|MNBU|MRKU|MRSU|MSAU|MSFU|MSKU|MSWU|MVIU|MWCU|MWMU|OCLU|POCU|PONU|SCMU|TORU)"
    }
}
update = {
    "$set": {
        "carrier": "Maersk"
    }
}
result = movements.update_many(query, update)
print(result.matched_count, "matched,", result.modified_count, "modified")

3896 matched, 0 modified


## Hapag-Lloyd

In [29]:
query = {
    "container": {
        "$regex": "(AZLU|CASU|CMUU|CPSU|CSQU|CSVU|FANU|FSCU|HAMU|HLBU|HLCU|HLXU|ITAU|IVLU|LBIU|LNXU|LYKU|MOMU|QIBU|QNNU|TLEU|TMMU|UACU|UAEU|UASU)"
    }
}
update = {
    "$set": {
        "carrier": "Hapag-Lloyd"
    }
}
result = movements.update_many(query, update)
print(result.matched_count, "matched,", result.modified_count, "modified")

22600 matched, 0 modified


## Evergreen

In [30]:
query = {
    "container": {
        "$regex": "(EGHU|EGSU|EISU|EMCU|HMCU|IMTU|LTIU|UGMU)"
    }
}
update = {
    "$set": {
        "carrier": "Evergreen"
    }
}
result = movements.update_many(query, update)
print(result.matched_count, "matched,", result.modified_count, "modified")

1671 matched, 0 modified


## Textainer

In [31]:
query = {
    "container": {
        "$regex": "(AMFU|AMZU|AXIU|CEOU|CHIU|CLHU|GAEU|GATU|GAZU|HCIU|KWCU|LLTU|MAGU|MAXU|MGLU|MLCU|PRSU|TEMU|TENU|TEXU|TGBU|TGHU|TXGU|WCIU|XINU)"
    },
    "processed": True
}
select = {
    "_id": 0,
    "container": 1,
    "carrier": 1
}
cursor = containers.find(query, select)

In [32]:
matched_count  = 0
modified_count = 0
index = 1
for textainer in cursor:
    query2 = {
        "container": textainer["container"]
    }
    update = {
        "$set": {
            "carrier": textainer["carrier"]
        }
    }
    result = movements.update_many(query2, update)
    matched_count += result.matched_count
    modified_count += result.modified_count
    index += 1
print(index, "indexes,", matched_count, "matched,", modified_count, "modified")

1035 indexes, 4432 matched, 0 modified


## Ubicaciones erróneas

In [35]:
bad_locations = list(locations.find({
    "location": {
        "$regex": "^ "
    }
}))
bad_locations

[{'_id': ObjectId('5cb8385dac8d26eb3e104f62'),
  'latitude': 46.138917,
  'location': ' Salarno, Italy',
  'longitude': 10.5204375},
 {'_id': ObjectId('5cb8681cac8d26eb3e106938'),
  'latitude': 35.000074,
  'location': ' SIN',
  'longitude': 104.999927},
 {'_id': ObjectId('5cb87aefac8d26eb3e106ff1'),
  'latitude': 31.1728205,
  'location': ' Morocco',
  'longitude': -7.3362482},
 {'_id': ObjectId('5cb8ed73ac8d26eb3e10a72d'),
  'latitude': 14.9,
  'location': ' QL',
  'longitude': 43.016667},
 {'_id': ObjectId('5cba016cac8d26eb3e116618'),
  'latitude': 64.6863136,
  'location': ' Russia',
  'longitude': 97.7453061}]

In [36]:
for bad_location in bad_locations:
    result = movements.update_many({
        "latitude"  : bad_location["latitude"],
        "longitude" : bad_location["longitude"]
    }, {
        "$unset": {
            "latitude"  : True,
            "longitude" : True
        }
    })
    print(result.matched_count, "matched,", result.modified_count, "modified")

0 matched, 0 modified
1 matched, 1 modified
4 matched, 4 modified
3 matched, 3 modified
2 matched, 2 modified
