In [11]:
%load_ext autoreload
%autoreload 2
from pymongo import MongoClient
import sys
from pathlib import Path
from tqdm import tqdm
import json
import random
import bson
from faker import Faker

sys.path.append(str(Path("..").resolve()))
from src import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Final touches: recover the lost Consistency due to seed
ℹ️ This notebook require the execution of [`1.2-Seeding_Database`](1.2-Seeding_Database.ipynb). Or you can just load the snapshop as in the following cell.

## Load post-preprocessing data
The following cell reset the `omero_museum` database loading the `3_seeded` snapshot.

In [12]:
%%capture
MongoClient("mongodb://localhost:27017/").drop_database("omero_museum")
!mongorestore --host localhost:27017 --drop --db omero_museum  "../backup/3_seeded/omero_museum"

In [13]:
faker=Faker()
connector = MongoDBConnector("omero_museum")
db = connector.db

The collections of the [1m[33momero_museum[0m db are:
----------------------------------------
[activities]: [1m[31m_id[0m [1m[33mcapacity[0m [1m[32mduration[0m [1m[36menrolled[0m [1m[34mroom[0m [1m[35mstart_date[0m [1m[37mticketIds[0m [1m[90mworkshop_title[0m
[artworks]: [1m[31m_id[0m [1m[33mauthorIds[0m [1m[32mcomments_star_1[0m [1m[36mcomments_star_2[0m [1m[34mcomments_star_3[0m [1m[35mcomments_star_4[0m [1m[37mcomments_star_5[0m [1m[90mdate[0m [1m[91mdescription[0m [1m[93mdonation_state[0m [1m[92mdonator_id[0m [1m[96mis_original[0m [1m[94mlocation_name[0m [1m[95mmaterials[0m [1m[31mperiod[0m [1m[33mseller_id[0m [1m[32msize[0m [1m[36mtecniques[0m [1m[34mtrade[0m [1m[35mtype[0m
[authors]: [1m[31m_id[0m [1m[33mbirth_date[0m [1m[32mgender[0m [1m[36mhome_town[0m [1m[34mname[0m [1m[35msurname[0m
[departments]: [1m[31m_id[0m [1m[33mfloor[0m [1m[32mfree_spots[0m [1m[36mroom[

## Artworks' comments' rating

In [14]:
for star in [1, 2, 3, 4, 5]:
    db.artworks.update_many({}, {"$set": {f"comments_star_{star}.$[].rating": star}})

## Visitors' `is_customer`

In [15]:
customer_fields = [
    "surname",
    "name",
    "donations_ids",
    "sales_ids",
    "phone_num",
    "email",
    "trade_ids",
    "chat",
    "comments",
]
db.visitors.update_many(
    {"is_customer": False},
    [
        {"$unset": customer_fields},
        {
            "$set": {
                "tickets": {"$slice": ["$tickets", 1]},
                "surveys": {"$slice": ["$surveys", 1]},
            }
        },
    ],
)

UpdateResult({'n': 51, 'nModified': 51, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)

## Messages' `type`

In [16]:
for msg in db.messages.find({}):
    if msg["type"] == "audio":
        db.messages.update_one(
            {"_id": msg["_id"]},
            [
                {"$unset": ["uri", "size"]},
                {"$set": {"content": faker.hexify("^" * 100)}},
            ],
        )
    if msg["type"] == "picture":
        db.messages.update_one(
            {"_id": msg["_id"]},
            [
                {"$unset": ["content", "length"]},
                {"$set": {"uri": faker.image_url()}},
            ],
        )
    if msg["type"] == "text":
        db.messages.update_one(
            {"_id": msg["_id"]},
            [{"$unset": ["uri", "size"]}],
        )

## Suppliers' `is_museum`

In [19]:
for _ in [True, False]:
    db.suppliers.update_many(
        {"survey_ids": {"$exists": _}},
        [{"$set": {"is_museum": _}}],
    )

---
## Dump Final Database

In [20]:
%%capture
!mongodump --host localhost:27017 --db omero_museum --out "../backup/4_consistent"