In [22]:
%load_ext autoreload
%autoreload 2
from pymongo import MongoClient
import sys
from datetime import datetime
from pathlib import Path
from tqdm import tqdm
import json
from pprint import pprint

sys.path.append(str(Path("..").resolve()))
from src import *

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Read Queries
ℹ️ This notebook require the execution of [`1.3-Consistency_Recover`](../1-migration/1.3-Consistency_Recover.ipynb). Or you can just load the snapshop as in the following cell.

## Load post-preprocessing data
The following cell reset the `omero_museum` database loading the `4_consistent` snapshot.

In [3]:
%%capture
MongoClient("mongodb://localhost:27017/").drop_database("omero_museum")
!mongorestore --host localhost:27017 --drop --db omero_museum  "../backup/4_consistent/omero_museum"

In [4]:
connector = MongoDBConnector("omero_museum")
db = connector.db

The collections of the [1m[33momero_museum[0m db are:
----------------------------------------
[activities]: [1m[31m_id[0m [1m[33mcapacity[0m [1m[32mduration[0m [1m[36menrolled[0m [1m[34mroom[0m [1m[35mstartDate[0m [1m[37mticketIds[0m [1m[90mworkshopTitle[0m
[artworks]: [1m[31m_id[0m [1m[33mauthorIds[0m [1m[32mcomments_star_1[0m [1m[36mcomments_star_2[0m [1m[34mcomments_star_3[0m [1m[35mcomments_star_4[0m [1m[37mcomments_star_5[0m [1m[90mdate[0m [1m[91mdescription[0m [1m[93mdonationState[0m [1m[92mdonatorId[0m [1m[96misOriginal[0m [1m[94mlocationName[0m [1m[95mmaterials[0m [1m[31mperiod[0m [1m[33msellerId[0m [1m[32msize[0m [1m[36mtecniques[0m [1m[34mtrade[0m [1m[35mtype[0m
[authors]: [1m[31m_id[0m [1m[33mbirth_date[0m [1m[32mgender[0m [1m[36mhome_town[0m [1m[34mname[0m [1m[35msurname[0m
[departments]: [1m[31m_id[0m [1m[33mfloor[0m [1m[32mfree_spots[0m [1m[36mroom[0m
[emp

### RD1: _Read all the information of a given artwork, including its authors, materials, crafting techniques and room placement_

In [None]:
artwork = "Mona Lisa"
doc = db.artworks.find_one({"_id": artwork})
pprint(doc,width=120)

### RD2: _Read the last 10 comments of a given artwork_

In [None]:
pipeline = [
    {"$match": {"_id": "Mona Lisa"}},
    {"$project": {
        "all_comments": {
            "$concatArrays": [
                "$comments_star_1",
                "$comments_star_2",
                "$comments_star_3",
                "$comments_star_4",
                "$comments_star_5"
            ]
        }
    }},
    {"$unwind": "$all_comments"},
    {"$sort": {"all_comments.date": -1}},
    {"$limit": 10},
    {"$replaceRoot": {"newRoot": "$all_comments"}}
]

last_10_comments = list(db.artworks.aggregate(pipeline))
pprint(last_10_comments, width=120)

### RD3: _Calculate the average rating of a given artwork_

In [12]:
pipeline_avg = [
    {"$match": {"_id": "Mona Lisa"}},
    {"$project": {
        "all_comments": {
            "$concatArrays": [
                "$comments_star_1",
                "$comments_star_2",
                "$comments_star_3",
                "$comments_star_4",
                "$comments_star_5"
            ]
        }
    }},
    {"$unwind": "$all_comments"},
    {"$group": {
        "_id": None,
        "avg_rating": {"$avg": "$all_comments.rating"}
    }}
]

avg_rating_result = list(db.artworks.aggregate(pipeline_avg))
print("Average rating:", avg_rating_result[0]["avg_rating"] if avg_rating_result else None)

Average rating: 3.0


### RD4: _Calculate the average rating of the comments posted by a given customer_

In [14]:
# RD4: Calcola la media dei rating dei commenti postati da uno specifico customer (pipeline MongoDB)

customer_id = 0

pipeline = [
    {"$match": {"_id": customer_id}},
    {"$unwind": "$comments"},
    {"$group": {
        "_id": "$_id",
        "avg_rating": {"$avg": "$comments.rating"}
    }}
]

result = list(db.visitors.aggregate(pipeline))
print("Average rating by customer:", result[0]["avg_rating"] if result else None)

Average rating by customer: 3.3333333333333335


### RD5: _Read all the comments of a given artwork with a given rating_

In [None]:
artwork_id = "Mona Lisa"
desired_rating = 4

pipeline = [
    {"$match": {"_id": artwork_id}},
    {"$project": {
        "all_comments": {
            "$concatArrays": [
                "$comments_star_1",
                "$comments_star_2",
                "$comments_star_3",
                "$comments_star_4",
                "$comments_star_5"
            ]
        }
    }},
    {"$unwind": "$all_comments"},
    {"$match": {"all_comments.rating": desired_rating}},
    {"$replaceRoot": {"newRoot": "$all_comments"}}
]

comments_with_rating = list(db.artworks.aggregate(pipeline))
pprint(comments_with_rating, width=120)

### RD6: _Read all the surveys, relative to this museum, filled out in a specific year_

In [None]:
museum_id = 12
year = 2021

pipeline = [
    {"$match": {"_id": museum_id, "isMuseum": True}},
    {"$unwind": "$surveys"},
    {"$addFields": {
        "survey_year": {"$year": "$surveys.date_of_compilation"}
    }},
    {"$match": {"survey_year": year}},
    {"$replaceRoot": {"newRoot": "$surveys"}}
]

surveys_in_year = list(db.suppliers.aggregate(pipeline))
pprint(surveys_in_year, width=120)

### RD7: _Read all the surveys filled out by a specific customer_

In [None]:
customer_id = 0

pipeline = [
    {"$match": {"_id": customer_id, "isCustomer": True}},
    {"$unwind": "$surveys"},
    {"$replaceRoot": {"newRoot": "$surveys"}}
]

customer_surveys = list(db.visitors.aggregate(pipeline))
pprint(customer_surveys, width=120)

### RD8: _Read all the surveys from a specific affiliated museum_

In [None]:
affiliated_museum_id = 12

pipeline = [
    {"$match": {"_id": affiliated_museum_id, "isMuseum": True}},
    {"$unwind": "$surveys"},
    {"$replaceRoot": {"newRoot": "$surveys"}}
]

affiliated_museum_surveys = list(db.suppliers.aggregate(pipeline))
pprint(affiliated_museum_surveys, width=120)

### RD9: _Read all tickets of a visitor_

In [None]:
visitor_id = 0

pipeline = [
    {"$match": {"_id": visitor_id}},
    {"$unwind": "$tickets"},
    {"$replaceRoot": {"newRoot": "$tickets"}}
]

visitor_tickets = list(db.visitors.aggregate(pipeline))
pprint(visitor_tickets, width=120)

### RD10: _Read all tickets for an event_

[]


### RD11: _Count tickets issued in a year_

### RD12: _Read 10 messages of a given chat previous than a specific date_

In [None]:
customer_id = 0
date_limit = datetime(2025, 8, 25, 0, 0, 0)

pipeline = [
    {"$match": {"_id": customer_id}},
    {"$unwind": "$chat.new_messages"},
    {"$match": {"chat.new_messages.sentDate": {"$lt": date_limit}}},
    {"$sort": {"chat.new_messages.sentDate": -1}},
    {"$limit": 10},
    {"$replaceRoot": {"newRoot": "$chat.new_messages"}}
]

messages = list(db.visitors.aggregate(pipeline))
pprint(messages, width=120)

### RD13: _Read active chats for a guide_ (correction json first)

### RD14: _Count chats opened in a year_

In [24]:
year = 2025

pipeline = [
    {"$match": {"isCustomer": True, "chat.date_creation": {"$exists": True}}},
    {"$addFields": {"chat_year": {"$year": "$chat.date_creation"}}},
    {"$match": {"chat_year": year}},
    {"$count": "opened_chats"}
]

result = list(db.visitors.aggregate(pipeline))
print("Chats opened in", year, ":", result[0]["opened_chats"] if result else 0)

Chats opened in 2025 : 12


### RD15: _Retrieve the artwork of a trade_

### RD16: _Count trades in a given year_

In [25]:
trade_year = 2025

pipeline_count_trades = [
    {"$match": {"trade.date_start": {"$exists": True}}},
    {"$addFields": {"trade_year": {"$year": "$trade.date_start"}}},
    {"$match": {"trade_year": trade_year}},
    {"$count": "trades_in_year"}
]

trades_in_year = list(db.artworks.aggregate(pipeline_count_trades))
print("Trades in", trade_year, ":", trades_in_year[0]["trades_in_year"] if trades_in_year else 0)

Trades in 2025 : 20


### RD17: _Retrieve the customer of a given artwork_

### RD18: _Retrieve all artwork inside a given room_

### RD19: _Retrieve all artwork of a given author_

In [None]:
author_id = "68b572f8845fcdbabc1c8905"
artworks_by_author = list(db.artworks.aggregate([
    {"$match": {"authorIds": author_id}}
]))
pprint(artworks_by_author, width=120)

### RD20: _Retrieve all activity of a given workshop type_

In [None]:
workshop_title = "Uno solo... ma tanti"
workshop_activities = list(db.activities.aggregate([
    {"$match": {"workshopTitle": workshop_title}}
]))
pprint(workshop_activities, width=120)

### RD21: _Retrieve all the employee of a given department_

### RD22: _Read all the surveys filled out in a specific day_

In [None]:
specific_day = datetime(2021, 4, 22)

pipeline = [
    {"$unwind": "$surveys"},
    {"$addFields": {
        "survey_date": {
            "$dateToString": {"format": "%Y-%m-%d", "date": "$surveys.date_of_compilation"}
        }
    }},
    {"$match": {"survey_date": specific_day.strftime("%Y-%m-%d")}},
    {"$replaceRoot": {"newRoot": "$surveys"}}
]

surveys_on_day = list(db.suppliers.aggregate(pipeline))
pprint(surveys_on_day, width=120)

### RD23: _Retrive information about a given laboratory activity, including the total number of reservations_

In [None]:
activity_id = 0

pipeline = [
    {"$match": {"_id": activity_id}},
    {"$addFields": {"total_reservations": {"$size": "$ticketIds"}}}
]

activity_info = list(db.activities.aggregate(pipeline))
pprint(activity_info, width=120)

### RD24: _View details about an employee, including their work shifts_

### RD25: _Retrive all the information about a limited event_ 

In [None]:
event_id = "Digital Preservation of Cultural Heritage"
event_info = db.limited_events.find_one({"_id": event_id})
pprint(event_info, width=120)

### RD26: _Retrive all the details of a visitor_ 

In [None]:
visitor_id = 0
visitor_info = db.visitors.find_one({"_id": visitor_id})
pprint(visitor_info, width=120)

### RD27: _Retrive the visitor count for each land from most frequent to least frequent_ 

In [None]:
pipeline = [
    {"$group": {
        "_id": "$land",
        "visitorCount": {"$sum": 1}
    }},
    {"$sort": {"visitorCount": -1}}
]

result = list(db.visitors.aggregate(pipeline))
pprint(result, width=120)

### RD28: _Retrive the visitor count for each impairment from most frequent to least frequent_ 

In [None]:
pipeline = [
    {"$group": {
        "_id": "$impairment",
        "visitorCount": {"$sum": 1}
    }},
    {"$sort": {"visitorCount": -1}}
]

result = list(db.visitors.aggregate(pipeline))
pprint(result, width=120)

### RD29: _Retrive the artwork count for each author from most frequent to least frequent_ 

### RD30: _Count the average age of participants at a given activity who completed the survey._ 