In [105]:
%load_ext autoreload
%autoreload 2
from pymongo import MongoClient
import sys
from datetime import datetime
from pathlib import Path
from tqdm import tqdm 
import json
from pprint import pprint
from bson import ObjectId

sys.path.append(str(Path("..").resolve()))
from src import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Read Queries
ℹ️ This notebook require the execution of [`1.3-Consistency_Recover`](../1-migration/1.3-Consistency_Recover.ipynb). Or you can just load the snapshop as in the following cell.

## Load post-preprocessing data
The following cell reset the `omero_museum` database loading the `4_consistent` snapshot.

In [50]:
%%capture
MongoClient("mongodb://localhost:27017/").drop_database("omero_museum")
!mongorestore --host localhost:27017 --drop --db omero_museum  "../backup/5_optimized/omero_museum"

In [2]:
connector = MongoDBConnector("omero_museum", silent=True)
db = connector.db

## Artworks

### RD1: _Read all the information of a given artwork, including its authors, materials, crafting techniques and room placement_

In [None]:
artwork_id = "Mona Lisa"
out = db.artworks.aggregate(
    [
        {"$match": {"_id": artwork_id}},
        {
            "$lookup": {
                "from": "authors",
                "localField": "author_ids",
                "foreignField": "_id",
                "as": "authors",
            }
        },
        {
            "$project": {
                "title": 1,
                "date": 1,
                "type": 1,
                "description": 1,
                "is_original": 1,
                "size": 1,
                "period": 1,
                "location_name": 1,
                "tecniques": 1,
                "materials": 1,
                "authors": {
                    "$map": {
                        "input": "$authors",
                        "as": "a",
                        "in": {"$concat": ["$$a.name", " ", "$$a.surname"]},
                    }
                },
            }
        },
    ]
).next()
jprint(out)

 {
     [1m[31m_id[0m : [1m[32mMona Lisa[0m
     [1m[31mdate[0m : [1m[37m2025-08-26 15:53:12[0m
     [1m[31mtype[0m : [1m[32mrelief[0m
     [1m[31mdescription[0m : [1m[32m“Il Partenone racchiude un’armonica sintesi di utilità, solidità e piacevolezza”, Vitruvio da “De Architectura”.[0m
     [1m[31mis_original[0m : [1m[36mFalse[0m
     [1m[31msize[0m : [1m[34m230[0m
     [1m[31mperiod[0m : [1m[32mBaroque[0m
     [1m[31mlocation_name[0m : [1m[32mGaiana[0m
     [1m[31mtecniques[0m : [1m[33m[DigitalArt, OilPainting][0m
     [1m[31mmaterials[0m : [1m[33m[Clay, Canvas, Textile, Marble][0m
     [1m[31mauthors[0m : [1m[33m[Giacomo Balla][0m
 }


### RD2: _Read the last 10 comments of a given artwork_

In [None]:
artwork_id = "The Starry Night"

pipeline = [
    {"$match": {"_id": artwork_id}},
    {
        "$project": {
            "all_comments": {
                "$concatArrays": [f"$comments_star_{s}" for s in range(1, 6)]
            }
        }
    },
    {"$sort": {"all_comments.date": -1}},
    {"$limit": 10},
]
out = db.artworks.aggregate(pipeline).next()
jprint(out)

 {
     [1m[31m_id[0m : [1m[32mThe Starry Night[0m
     [1m[31mall_comments[0m :
     [
        
         {
             [1m[31m_id[0m : [1m[37m68bc20d4dd41b3717d3d7a1c[0m
             [1m[31mrating[0m : [1m[34m1[0m
             [1m[31mdate[0m : [1m[37m2025-08-29 20:35:46[0m
             [1m[31mmessage[0m : [1m[32mStesso corso meglio spingere. Passare termine costringere riva trattare bene pane.
Scorrere occhio fresco idea qua. Valere monte prato signorina notevole.[0m
         }
        
         {
             [1m[31m_id[0m : [1m[37m68bc20d4dd41b3717d3d7a2f[0m
             [1m[31mrating[0m : [1m[34m1[0m
             [1m[31mdate[0m : [1m[37m2025-08-09 05:30:19[0m
             [1m[31mmessage[0m : [1m[32mSecondo nuovo arte difesa salvare sorella. Non caso dipendere fino freddo armare avanzare.
Credere fiducia cattolico possibilità condurre esperienza. Rispondere dopo orecchio offrire ricco.[0m
         }
        
         {
      

### RD3: _Calculate the average rating of a given artwork_

In [None]:
artwork_id = "The Starry Night"

pipeline = [
    {"$match": {"_id": artwork_id}},
    {
        "$project": {
            "all_comments": {
                "$concatArrays": [f"$comments_star_{s}" for s in range(1, 6)]
            }
        }
    },
    {"$unwind": "$all_comments"},
    {"$group": {"_id": None, "avg_rating": {"$avg": "$all_comments.rating"}}},
]
out = list(db.artworks.aggregate(pipeline))
jprint(out)

 [
    
     {
         [1m[31m_id[0m : [1m[37mNone[0m
         [1m[31mavg_rating[0m : [1m[34m1.8333333333333333[0m
     }
 ]


### RD4: _Calculate the average rating of the comments posted by a given customer_

In [235]:
customer_id = db.visitors.find_one({"comments": {"$exists": 1}})["_id"]  # type: ignore

pipeline = [
    {"$match": {"_id": customer_id}},
    {"$unwind": "$comments"},
    {"$group": {"_id": None, "avg_rating": {"$avg": "$comments.rating"}}},
]
out = list(db.visitors.aggregate(pipeline))
jprint(out)

 [
    
     {
         [1m[31m_id[0m : [1m[37mNone[0m
         [1m[31mavg_rating[0m : [1m[34m2.6666666666666665[0m
     }
 ]


### RD5: _Read all the comments of a given artwork with a given rating_

In [None]:
artwork_id = "Mona Lisa"
rating = 1

pipeline = [
    {"$match": {"_id": artwork_id}},
    {"$unwind": f"$comments_star_{rating}"},
    {"$replaceRoot": {"newRoot": f"$comments_star_{rating}"}},
]
out = list(db.artworks.aggregate(pipeline))
jprint(out)

 [
    
     {
         [1m[31m_id[0m : [1m[37m68bc20d4dd41b3717d3d7919[0m
         [1m[31mrating[0m : [1m[34m1[0m
         [1m[31mdate[0m : [1m[37m2025-08-29 04:03:37[0m
         [1m[31mmessage[0m : [1m[32mCrisi qua superiore triste oramai. Qualsiasi civile ecco voi apparire lira. Rimanere diventare convincere gridare pane.
Confessare lira incontrare idea dio contadino. Ufficiale insegnare però sotto.[0m
     }
    
     {
         [1m[31m_id[0m : [1m[37m68bc20d4dd41b3717d3d7986[0m
         [1m[31mrating[0m : [1m[34m1[0m
         [1m[31mdate[0m : [1m[37m2025-08-09 16:14:25[0m
         [1m[31mmessage[0m : [1m[32mPopolazione provincia difficoltà fame comprendere girare terzo di.[0m
     }
 ]


## Surveys

### RD6: _Read all the surveys, relative to this museum, filled out in a specific year_

In [271]:
date_start = datetime(2021, month=1, day=1)
date_end = datetime(2022, month=1, day=1)
out = db.visitors.find(
        {"surveys.date_of_compilation": {"$gte": date_start, "$lte": date_end}},
        {"surveys": 1},
    
)
jprint([_ for visitor in out for _ in visitor["surveys"]][:2])

 [
    
     {
         [1m[31maccompanying_persons_visit[0m : [1m[32mfamiglia[0m
         [1m[31mdate_of_compilation[0m : [1m[37m2021-04-22 00:00:00[0m
         [1m[31mreason_for_visit[0m : [1m[32mtrascorrere tempo libero con amici/parenti[0m
         [1m[31mnumber_of_visits[0m : [1m[34m0[0m
         [1m[31mreturn[0m : [1m[32mProbabilmente No[0m
         [1m[31mtype_of_visit[0m : [1m[32mvisita libera[0m
         [1m[31mtitle_of_studies[0m : [1m[32mDiploma[0m
         [1m[31mevaluation_of_experience[0m : [1m[34m10[0m
         [1m[31mevaluation_of_facility[0m : [1m[34m10[0m
         [1m[31mevaluation_of_visit[0m : [1m[34m10[0m
     }
    
     {
         [1m[31maccompanying_persons_visit[0m : [1m[32mscolaresca[0m
         [1m[31mdate_of_compilation[0m : [1m[37m2021-04-28 00:00:00[0m
         [1m[31mreason_for_visit[0m : [1m[32mpassare un momento personale piacevole[0m
         [1m[31mnumber_of_visits[0m : 

### RD7: _Read all the surveys filled out by a specific customer_

In [282]:
customer_id = db.visitors.find_one({"surveys": {"$exists": 1}})["_id"]  # type: ignore

out = db.visitors.find_one({"_id": customer_id}, {"surveys": 1})
jprint(out["surveys"][:2]) # type: ignore

 [
    
     {
         [1m[31maccompanying_persons_visit[0m : [1m[32mfamiglia[0m
         [1m[31mdate_of_compilation[0m : [1m[37m2021-04-22 00:00:00[0m
         [1m[31mreason_for_visit[0m : [1m[32mtrascorrere tempo libero con amici/parenti[0m
         [1m[31mnumber_of_visits[0m : [1m[34m0[0m
         [1m[31mreturn[0m : [1m[32mProbabilmente No[0m
         [1m[31mtype_of_visit[0m : [1m[32mvisita libera[0m
         [1m[31mtitle_of_studies[0m : [1m[32mDiploma[0m
         [1m[31mevaluation_of_experience[0m : [1m[34m10[0m
         [1m[31mevaluation_of_facility[0m : [1m[34m10[0m
         [1m[31mevaluation_of_visit[0m : [1m[34m10[0m
     }
    
     {
         [1m[31maccompanying_persons_visit[0m : [1m[32mscolaresca[0m
         [1m[31mdate_of_compilation[0m : [1m[37m2021-04-28 00:00:00[0m
         [1m[31mreason_for_visit[0m : [1m[32mpassare un momento personale piacevole[0m
         [1m[31mnumber_of_visits[0m : 

### RD8: _Read all the surveys from a specific affiliated museum_

In [299]:
# The constraint ("is_musum" = true) is implied by the existance of the "surveys" array
supplier_id = db.suppliers.find_one({"survey_ids": {"$exists": 1}})["_id"]  # type: ignore

pipeline = [
    {"$match": {"_id": supplier_id}},
    {
        "$lookup": {
            "from": "visitors",
            "localField": "survey_ids",
            "foreignField": "_id",
            "as": "surveys",
        }
    },
]
out = db.suppliers.aggregate(pipeline).next()
jprint([v["surveys"] for v in out["surveys"]][3])

 [
    
     {
         [1m[31maccompanying_persons_visit[0m : [1m[32mscolaresca[0m
         [1m[31mdate_of_compilation[0m : [1m[37m2021-04-29 00:00:00[0m
         [1m[31mreason_for_visit[0m : [1m[32mimparare cose nuove[0m
         [1m[31mnumber_of_visits[0m : [1m[34m0[0m
         [1m[31mreturn[0m : [1m[32mAssolutamente no[0m
         [1m[31mtype_of_visit[0m : [1m[32mpartecipazione ad una mostra/laboratorio[0m
         [1m[31mtitle_of_studies[0m : [1m[32mElem.[0m
         [1m[31mevaluation_of_experience[0m : [1m[34m10[0m
         [1m[31mevaluation_of_facility[0m : [1m[34m10[0m
         [1m[31mevaluation_of_visit[0m : [1m[34m10[0m
     }
    
     {
         [1m[31maccompanying_persons_visit[0m : [1m[32mgruppo organizzato[0m
         [1m[31mdate_of_compilation[0m : [1m[37m2021-04-22 00:00:00[0m
         [1m[31mreason_for_visit[0m : [1m[32mvedere oggetti importanti[0m
         [1m[31mnumber_of_visits[0m : [

## Tickets

### RD9: _Read all tickets of a visitor_

In [None]:
visitor_id = db.visitors.find_one()["_id"]  # type: ignore

out = db.visitors.find_one(visitor_id)
jprint(out["tickets"]) # type: ignore

 [
    
     {
         [1m[31m_id[0m : [1m[37m68bc20d4dd41b3717d3d718c[0m
         [1m[31mprice[0m : [1m[34m6[0m
         [1m[31mdate[0m : [1m[37m2025-08-25 10:40:58[0m
         [1m[31mcheck_in[0m : [1m[36mTrue[0m
     }
    
     {
         [1m[31m_id[0m : [1m[37m68bc20d4dd41b3717d3d7123[0m
         [1m[31mprice[0m : [1m[34m6[0m
         [1m[31mdate[0m : [1m[37m2025-08-11 08:54:43[0m
         [1m[31mcheck_in[0m : [1m[36mFalse[0m
     }
 ]


### RD10: _Read all tickets for an event_

In [None]:
event_id = ObjectId("68bc20d4dd41b3717d3d7261")

pipeline = [
    {"$match": {"_id": event_id}},
    {
        "$lookup": {
            "from": "visitors",
            "localField": "ticket_ids",
            "foreignField": "tickets._id",
            "as": "visitors",
        }
    },
]
out = db.activities.aggregate(pipeline).next()
jprint([t for v in out["visitors"] for t in v["tickets"]][:2])

 [
    
     {
         [1m[31m_id[0m : [1m[37m68bc20d4dd41b3717d3d71c2[0m
         [1m[31mprice[0m : [1m[34m70[0m
         [1m[31mdate[0m : [1m[37m2025-08-16 06:54:00[0m
         [1m[31mcheck_in[0m : [1m[36mTrue[0m
     }
    
     {
         [1m[31m_id[0m : [1m[37m68bc20d4dd41b3717d3d70fc[0m
         [1m[31mprice[0m : [1m[34m0[0m
         [1m[31mdate[0m : [1m[37m2025-08-24 08:32:48[0m
         [1m[31mcheck_in[0m : [1m[36mFalse[0m
     }
 ]


### RD11: _Count tickets issued in a year_

In [None]:
start = datetime(2025, 1, 1)
end = datetime(2026, 1, 1)

# Using B-Tree
db.visitors.count_documents(
    {"tickets.date": {"$gte": start, "$lt": end}}, hint="tickets.date_1"
)   

81

## Chats

### RD12: _Read 10 messages of a given chat previous than a specific date_

In [None]:
customer_id = db.visitors.find_one({"chat": {"$exists": 1}})["_id"]  # type: ignore
before_date = datetime(2026, 8, 25)

pipeline = [
    {"$match": {"_id": customer_id}},
    {"$project": {"chat.new_messages": 1}},
    {"$unwind": "$chat.new_messages"},
    {"$match": {"chat.new_messages.sent_date": {"$lt": before_date}}},
    {"$sort": {"chat.new_messages.sent_date": -1}},
    {"$limit": 10},
    {"$replaceRoot": {"newRoot": "$chat.new_messages"}},
]
msgs = list(db.visitors.aggregate(pipeline))

if (l := len(msgs)) < 10:
    old_ids = db.visitors.find_one(customer_id)["chat"]["old_messages"]  # type: ignore
    old_msgs = list(
        db.messages.find(
            {
                "_id": {"$in": old_ids},
                # "customer_id": customer_id, # Inconsitent
                "sent_date": {"$lt": before_date},
            },
            sort=[("sent_date", -1)],
            limit=10 - l,
        )
    )
    msgs.extend(old_msgs)
jprint(msgs[:2])

 [
    
     {
         [1m[31mcustomer_id[0m : [1m[37m68bc6aad24580947f6121cb8[0m
         [1m[31msent_date[0m : [1m[37m2025-09-03 14:57:37[0m
         [1m[31mtype[0m : [1m[32mpicture[0m
         [1m[31mcontent[0m : [1m[32mTecnico prendere grosso notevole dormire tuttavia. Mercato largo sacrificio davanti buio onore. Limitare subito trarre lui troppo.
Scusare fede attesa abitare chi condurre.[0m
         [1m[31mdelivery_status[0m : [1m[32msending[0m
     }
    
     {
         [1m[31mcustomer_id[0m : [1m[37m68bc6aad24580947f6121cae[0m
         [1m[31msent_date[0m : [1m[37m2025-09-01 07:40:18[0m
         [1m[31mtype[0m : [1m[32maudio[0m
         [1m[31mcontent[0m : [1m[32mScoprire ecco essere giornata. Né ordinare senso qualcosa discorso. Cantare tranquillo enorme improvviso origine appunto campo difesa.
Villa riunire liberare giovane. Incontrare muovere operaio.[0m
         [1m[31mdelivery_status[0m : [1m[32msending[0m
     }

### RD13: _Read active chats for a guide_

In [97]:
guide_id = db.roles.find_one({"type": "guide"})["_id"]  # type: ignore

pipeline = [
    {"$match": {"_id": guide_id}},
    {
        "$lookup": {
            "from": "visitors",
            "localField": "chat_ids",
            "foreignField": "chat._id",
            "as": "customers",
        }
    },
]
out = db.roles.aggregate(pipeline).next()
jprint([c["chat"] for c in out["customers"]])

 [
    
     {
         [1m[31m_id[0m : [1m[37m68bc6aad24580947f6122434[0m
         [1m[31mdate_creation[0m : [1m[37m2025-08-27 05:01:38[0m
         [1m[31mnew_messages[0m :
         [
            
             {
                 [1m[31mcustomer_id[0m : [1m[37m68bc6aad24580947f6121cd5[0m
                 [1m[31msent_date[0m : [1m[37m2025-08-13 19:52:11[0m
                 [1m[31mtype[0m : [1m[32mtext[0m
                 [1m[31mcontent[0m : [1m[32mSposare ciascuno lì chiave. Fermare spiegare dolce addosso. Arrestare fiducia cioè finestra pensiero contento ora.[0m
                 [1m[31mdelivery_status[0m : [1m[32mreceived[0m
             }
            
             {
                 [1m[31mcustomer_id[0m : [1m[37m68bc6aad24580947f6121cb0[0m
                 [1m[31msent_date[0m : [1m[37m2025-08-26 00:54:45[0m
                 [1m[31mtype[0m : [1m[32mtext[0m
                 [1m[31mcontent[0m : [1m[32mPrincipale esso se

### RD14: _Count chats opened in a year_

In [101]:
start = datetime(2025, 1, 1)
end = datetime(2026, 1, 1)

# Using B-Tree
db.visitors.count_documents(
    {"chat.date_creation": {"$gte": start, "$lt": end}}, hint="chat.date_creation_1"
)

18

## Trades

### RD15: _Retrieve the artwork of a trade_

In [103]:
trade_id = db.artworks.find_one({"trade": {"$exists": True}})["trade"]["_id"]  # type: ignore

out = db.artworks.find_one({"trade._id": trade_id})
jprint(out)

 {
     [1m[31m_id[0m : [1m[32mMona Lisa[0m
     [1m[31mdate[0m : [1m[37m2025-09-02 09:28:18[0m
     [1m[31mtype[0m : [1m[32mrelief[0m
     [1m[31mdescription[0m : [1m[32m“Il Partenone racchiude un’armonica sintesi di utilità, solidità e piacevolezza”, Vitruvio da “De Architectura”.[0m
     [1m[31mis_original[0m : [1m[36mFalse[0m
     [1m[31msize[0m : [1m[34m230[0m
     [1m[31mperiod[0m : [1m[32mAncient[0m
     [1m[31mlocation_name[0m : [1m[32mForlimpopoli[0m
     [1m[31mauthor_ids[0m :
     [
         [1m[37m68b572f8845fcdbabc1c8905[0m
         [1m[37m68b572f8845fcdbabc1c8908[0m
     ]
     [1m[31mtecniques[0m : [1m[33m[Mosaic][0m
     [1m[31mcomments_star_1[0m :
     [
        
         {
             [1m[31m_id[0m : [1m[37m68bc6aad24580947f612262b[0m
             [1m[31mrating[0m : [1m[34m1[0m
             [1m[31mdate[0m : [1m[37m2025-08-24 21:51:17[0m
             [1m[31mmessage[0m : [1m[32mFol

### RD16: _Count trades in a given year_

In [104]:
start = datetime(2025, 1, 1)
end = datetime(2026, 1, 1)

# Using B-Tree
db.artworks.count_documents(
    {"trade.date_start": {"$gte": start, "$lt": end}}, hint="trade.date_start_1"
)

20