In [1]:
%load_ext autoreload
%autoreload 2
from pymongo import MongoClient
import sys
from pathlib import Path
from tqdm import tqdm
import json

sys.path.append(str(Path("..").resolve()))
from src import *

# Seeding Missing Collections and Entries
ℹ️ This notebook require the execution of [`1.1-Migrating_MySQL_To_MongoDB`](1.1-Migrating_MySQL_To_MongoDB.ipynb). Or you can just load the snapshop as in the following cell.

## Load post-preprocessing data
The following cell reset the `omero_museum` database loading the `2_migration_preprocessed` snapshot.

In [3]:
%%capture
MongoClient("mongodb://localhost:27017/").drop_database("omero_museum")
!mongorestore --host localhost:27017 --drop --db omero_museum  "../backup/2_migration_preprocessed/omero_museum"

In [None]:
schema = json.load(open("schema.json"))
connector = MongoDBConnector("omero_museum")
db = connector.db

The collections of the [1m[33momero_museum[0m db are:
----------------------------------------
[activities]:
[artworks]: [1m[31m_id[0m [1m[33mdate[0m [1m[32mdescription[0m [1m[36mroom[0m [1m[34mtype[0m
[authors]: [1m[31m_id[0m [1m[33mbirth_date[0m [1m[32mgender[0m [1m[36mhome_town[0m [1m[34mname[0m [1m[35msurname[0m
[departments]: [1m[31m_id[0m [1m[33mfloor[0m [1m[32mfree_spots[0m [1m[36mroom[0m
[employees]: [1m[31m_id[0m [1m[33mbirth_date[0m [1m[32mcurriculum[0m [1m[36mdate_start[0m [1m[34memail[0m [1m[35mgender[0m [1m[37mhometown[0m [1m[90mname[0m [1m[91mphone_number[0m [1m[93msurname[0m
[limited_events]:
[messages]:
[rooms]:
[suppliers]:
[surveys]: [1m[31m_id[0m [1m[33maccompanying_persons_visit[0m [1m[32mdate_of_compilation[0m [1m[36mevaluation_of_experience[0m [1m[34mevaluation_of_facility[0m [1m[35mevaluation_of_visit[0m [1m[37mnumber_of_visits[0m [1m[90mreason_for_visit[0m [1m

## Generating missing entries


In [32]:
ids = {
    "surveys": range(100),
    "visitors": range(50),
    "tickets": range(50),
    "workshops": [_["_id"] for _ in db.workshops.find({})],
    "activities":range(25),
    "rooms": [
        "Greco e Romano",
        "Medievale e 400",
        "Ancona",
        "Rinascimentale",
        "Contemporaneo",
        "Impressionismo",
    ],
    "messages":range(100),
    "suppliers":range(25),
    "artworks": [
        "Mona Lisa",
        "The Starry Night",
        "The Persistence of Memory",
        "The Birth of Venus",
        "The Night Watch",
        "Girl with a Pearl Earring",
        "Guernica",
        "American Gothic",
        "The Scream",
        "Les Demoiselles d'Avignon",
        "The Kiss",
        "The Last Supper",
        "Liberty Leading the People",
        "The Great Wave off Kanagawa",
        "Nighthawks",
        "Campbell's Soup Cans",
        "Impression, Sunrise",
        "No. 5, 1948",
        "A Sunday Afternoon on the Island of La Grande Jatte",
        "The School of Athens",
    ],
    "limited_events": [
        "Digital Preservation of Cultural Heritage",
        "Museum Education in the 21st Century",
        "Community Engagement and Inclusion",
        "Sustainability in Museum Practices",
        "Virtual and Augmented Reality in Exhibitions",
        "Ethics of Artifact Repatriation",
        "Artificial Intelligence in Collections Management",
        "Storytelling Through Curatorial Design",
        "Accessibility and Universal Design in Museums",
        "Cross-Cultural Dialogue Through Exhibits",
        "The Future of Archaeological Conservation",
        "Museums as Spaces for Social Justice",
        "Data-Driven Visitor Experience",
        "Collaborative Curation with Indigenous Communities",
        "The Role of Museums in Climate Change Awareness",
        "Gamification of Museum Learning",
        "Blockchain for Provenance Tracking",
        "Digital Twins of Artifacts",
        "Museums and Mental Health",
        "Hybrid Exhibitions: Physical Meets Digital",
    ],
}

In [40]:
themes = [
    "Preservation",
    "Education",
    "Engagement",
    "Sustainability",
    "Virtuality",
    "Repatriation",
    "AI",
    "Storytelling",
    "Accessibility",
    "Dialogue",
    "Conservation",
    "Justice",
    "Data",
    "Collaboration",
    "Climate",
    "Gamification",
    "Blockchain",
    "DigitalTwins",
    "Wellbeing",
    "Hybridization",
]
surveys= db.surveys.find({},{"_id":0}).to_list()

In [35]:
seeders = {
    "rooms": DocSeeder(
        {
            "_id": EntrySeeder(ids["rooms"], unique=True),
            "floor": EntrySeeder(range(5), p=0.6),
        }
    ),
    "activities": DocSeeder(
        {
            "_id": EntrySeeder(ids["activities"], unique=True),
            "room": EntrySeeder(ids["rooms"]),
            "duration": EntrySeeder(range(15, 600, 15)),
            "enrolled": EntrySeeder(range(20)),
            "startDate": EntrySeeder("date_time"),
            "capacity": EntrySeeder(range(20, 100)),
            "ticketIds": EntrySeeder(
                lambda _: _.random_elements(
                    range(len(ids["tickets"])),
                    length=_.random_int(max=15),
                    unique=True,
                )
            ),
            "workshopTitle": EntrySeeder(ids["workshops"], p=0.4),
        }
    ),
    "limited_events": DocSeeder(
        {
            "_id": EntrySeeder(ids["limited_events"], unique=True),
            "capacity": EntrySeeder(range(20, 50)),
            "type": EntrySeeder(["exhibition", "conference"]),
            "startDate": EntrySeeder("date_time"),
            "endDate": EntrySeeder("date_time"),
            "artist": EntrySeeder(range(10)),
            "theme": EntrySeeder(themes, p=0.5),
            "description": EntrySeeder("text"),
            "roomName": EntrySeeder(ids["rooms"]),
            "authorIds": EntrySeeder(
                lambda _: _.random_elements(
                    range(15), length=_.random_int(max=4), unique=True
                )
            ),
            "artworkTitles": EntrySeeder(
                lambda _: _.random_elements(
                    ids["artworks"], length=_.random_int(min=3, max=10), unique=True
                ),
                p=0.8,
            ),
            "ticketIds": EntrySeeder(
                lambda _: _.random_elements(
                    range(len(ids["tickets"])), length=_.random_int(max=10), unique=True
                ),
            ),
        }
    ),
    "messages": DocSeeder(
        {
            "_id": EntrySeeder(ids["messages"], unique=True),
            "customerId": EntrySeeder(range(len(ids["visitors"]))),
            "sentDate": EntrySeeder("date_time"),
            "type": EntrySeeder(["text", "audio", "picture"]),
            "content": EntrySeeder("text"),
            "deliveryStatus": EntrySeeder(["sending", "sent", "received", "seen"]),
            "length": EntrySeeder(range(10, 300, 5), p=0.25),
            "uri": EntrySeeder("url", p=0.35),
        }
    ),
    "suppliers": DocSeeder(
        {
            "_id": EntrySeeder(ids["suppliers"], unique=True),
            "name": EntrySeeder("name"),
            "iban": EntrySeeder("iban"),
            "isState": EntrySeeder("boolean", p=0.5),
            "isMuseum": EntrySeeder("boolean"),
            "email": EntrySeeder("email"),
            "phoneNum": EntrySeeder("phone_number"),
            "surveys": EntrySeeder(
                lambda _: _.random_elements(
                    range(len(ids["surveys"])), length=_.random_int(max=15), unique=True
                )
            ),
        }
    ),
}

In [36]:
# Seed!
for coll, seeder in seeders.items():
    cprint("Seeding", f"green:{coll}", "...")
    db[coll].delete_many({})
    db[coll].insert_many(seeder.seed(len(ids[coll])))

Seeding [1m[32mrooms[0m ...
Seeding [1m[32mactivities[0m ...
Seeding [1m[32mlimited_events[0m ...
Seeding [1m[32mmessages[0m ...
Seeding [1m[32msuppliers[0m ...


Si salvano
- departments
- employees
- authors
- workshops
- surveys

Mancano
- artworks
- visitors
- tickets