In [1]:
import project_config
from pymongo import MongoClient
from ted_sws import config
from ted_sws.data_manager.adapters.notice_repository import NoticeRepository

In [3]:
mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL)
notice_repository = NoticeRepository(mongodb_client=mongodb_client)

In [56]:
created_at_field_projections = notice_repository.collection.aggregate([
    {"$project": {
        "created_at": {
            "$toDate": "$created_at"
        }
    }}
])

normalised_metadata_date_fields_projections = notice_repository.collection.aggregate([
    {
        "$match": {"normalised_metadata": {"$ne": None}}
    },
    {
        "$project": {
            "normalised_metadata.publication_date": {
                "$toDate": "$normalised_metadata.publication_date"
            },
            "normalised_metadata.document_sent_date": {
                "$toDate": "$normalised_metadata.document_sent_date"
            }
        }
    }
])


In [53]:
def flatten_dict(tmp_dict, dict_key=''):
    if type(tmp_dict) == dict:
        dict_key = dict_key + '.' if dict_key else dict_key
        for k in tmp_dict:
            yield from flatten_dict(tmp_dict[k], dict_key + str(k))
    else:
        yield dict_key, tmp_dict


def update_mongodb_documents(aggregate_results):
    for aggregate_result in aggregate_results:
        document_patch = {k:v for k,v in flatten_dict(aggregate_result)}
        notice_repository.collection.update_one({'_id': document_patch['_id']}, {"$set": document_patch})

In [55]:
print("Update field: created_at")
update_mongodb_documents(created_at_field_projections)
print("Update fields: [normalised_metadata.publication_date, normalised_metadata.document_sent_date]")
update_mongodb_documents(normalised_metadata_date_fields_projections)
