In [None]:
%load_ext autoreload
%autoreload 2
from pymongo import MongoClient
import sys
from pathlib import Path
from tqdm import tqdm
import json

sys.path.append(str(Path("..").resolve()))
from src import *

# Migrate MySQL to MongoDB
As the database schema has evolved significantly since the current MySQL database was created, we've first migrated a subset of the collections using [_MongoDB Relational Migrator_](https://www.mongodb.com/resources/solutions/use-cases/mysql-to-mongodb).

The database up to this point can be loaded from `backup/1_after_migration` using the following command.
```sh
mongorestore --host localhost:27017 --db omero_museum  "../backup/1_after_migration/"
```

In this notebook we are going to complete the migration using handwritten queries.

## Load post-migration data
The following cell reset the `omero_museum` database loading the `1_after_migration` snapshot.

In [None]:
%%capture
# Start from scratch
MongoClient("mongodb://localhost:27017/").drop_database("omero_museum")
!mongorestore --host localhost:27017 --drop --db omero_museum  "../backup/1_after_migration/"

## Visualizing the migrated database

In [57]:
schema=json.load(open("schema.json"))
connector=MongoDBConnector("omero_museum")
db=connector.db

The collections of the [1m[33momero_museum[0m db are:
----------------------------------------
[activities]:
[artworks]: [1m[31m_id[0m [1m[33mdata[0m [1m[32mdescrizione[0m [1m[36mid[0m [1m[34msala[0m [1m[35mtipologia[0m
[authors]: [1m[31m_id[0m [1m[33mcognome[0m [1m[32mdata_nasc[0m [1m[36mid[0m [1m[34mluogo_nasc[0m [1m[35mnome[0m [1m[37msesso[0m
[departments]: [1m[31m_id[0m [1m[33mnome[0m [1m[32mpiano[0m [1m[36mposti_occ[0m [1m[34mposti_tot[0m [1m[35mstanza[0m
[employees]: [1m[31m_id[0m [1m[33mcellulare[0m [1m[32mcognome[0m [1m[36mcurriculum[0m [1m[34mdata_nasc[0m [1m[35mdata_registrazione[0m [1m[37memail[0m [1m[90mid[0m [1m[91mluogo_nasc[0m [1m[93mnome[0m [1m[92msesso[0m
[limited_events]:
[messages]:
[rooms]:
[suppliers]:
[surveys]: [1m[31m_id[0m [1m[33maccompagnatori_visita[0m [1m[32mdata_compilazione[0m [1m[36mmotivazione_visita[0m [1m[34mnumero_visite[0m [1m[35mritorno[0m 

There are 3 kinds of problems that catch the eye:
1. Collections need to be **renamed**,
2. Some collections are **missing**.
3. Several fields have **changed** during the design.

### 1. Renaming fields

In [45]:
collections_renaming = {
    "biglietti": "tickets",
    "reparti": "departments",
    "dipendenti": "employees",
    "opere": "artworks",
    "clienti": "visitors",
    "laboratori": "workshops",
    "questionari": "surveys",
    "artisti": "authors",
}
for old_name, new_name in collections_renaming.items():
    if old_name not in connector.collections:
        continue
    db[old_name].rename(new_name)
    cprint("Renaming collection", f"red:{old_name} --> {new_name}")

Renaming collection [1m[31mbiglietti --> tickets[0m
Renaming collection [1m[31mreparti --> departments[0m
Renaming collection [1m[31mdipendenti --> employees[0m
Renaming collection [1m[31mopere --> artworks[0m
Renaming collection [1m[31mclienti --> visitors[0m
Renaming collection [1m[31mlaboratori --> workshops[0m
Renaming collection [1m[31mquestionari --> surveys[0m
Renaming collection [1m[31martisti --> authors[0m


### 2. Adding missing collections

In [46]:
collections_missing = [
    "rooms",
    "messages",
    "suppliers",
    "limited_events",
    "activities",
]
for name in collections_missing:
    db.create_collection(name)
    cprint("Creating collection", f"green:{name}")

Creating collection [1m[32mrooms[0m
Creating collection [1m[32mmessages[0m
Creating collection [1m[32msuppliers[0m
Creating collection [1m[32mlimited_events[0m
Creating collection [1m[32mactivities[0m


### 3. Renaming documents' entries

In [51]:
connector.stats()

The collections of the [1m[33momero_museum[0m db are:
----------------------------------------
[activities]:
[artworks]: [1m[31m_id[0m [1m[33mdata[0m [1m[32mdescrizione[0m [1m[36mid[0m [1m[34msala[0m [1m[35mtipologia[0m
[authors]: [1m[31m_id[0m [1m[33mcognome[0m [1m[32mdata_nasc[0m [1m[36mid[0m [1m[34mluogo_nasc[0m [1m[35mnome[0m [1m[37msesso[0m
[departments]: [1m[31m_id[0m [1m[33mnome[0m [1m[32mpiano[0m [1m[36mposti_occ[0m [1m[34mposti_tot[0m [1m[35mstanza[0m
[employees]: [1m[31m_id[0m [1m[33mcellulare[0m [1m[32mcognome[0m [1m[36mcurriculum[0m [1m[34mdata_nasc[0m [1m[35mdata_registrazione[0m [1m[37memail[0m [1m[90mid[0m [1m[91mluogo_nasc[0m [1m[93mnome[0m [1m[92msesso[0m
[limited_events]:
[messages]:
[rooms]:
[suppliers]:
[surveys]: [1m[31m_id[0m [1m[33maccompagnatori_visita[0m [1m[32mdata_compilazione[0m [1m[36mmotivazione_visita[0m [1m[34mnumero_visite[0m [1m[35mritorno[0m 

In [None]:
entries_renaming = {
    "artworks": {
        "data": "date",
        "descrizione": "description",
        "sala": "room",
        "tipologia": "type",
    },
    "authors": {
        "cognome": "surname",
        "data_nasc": "birth_date",
        "luogo_nasc": "home_town",
        "nome": "name",
        "sesso": "gender",
    },
    "departments": {
        "nome": "name",
        "piano": "floor",
        "posti_occ": "free_spots",
        "stanza": "room",
    },
    "employees": {
        "cellulare": "phone_number",
        "cognome": "surname",
        "curriculum": "curriculum",
        "data_nasc": "birth_date",
        "data_registrazione": "date_start",
        "email": "email",
        "luogo_nasc": "hometown",
        "nome": "name",
        "sesso": "gender",
    },
    "surveys": {
        "accompagnatori_visita": "accompanying_persons_visit",
        "data_compilazione": "date_of_compilation",
        "motivazione_visita": "reason_for_visit",
        "numero_visite": "number_of_visits",
        "ritorno": "return",
        "tipologia_visita": "type_of_visit",
        "titolo_studi": "title_of_studies",
        "valutazione_esperienza": "evaluation_of_experience",
        "valutazione_struttura": "evaluation_of_facility",
        "valutazione_visita": "evaluation_of_visit",
    },
    "tickets": {
        "costo": "price",
        "data_stampa": "date",
    },
    "visitors": {
        "cellulare": "cell phone number",
        "cognome": "surname",
        "email": "email",
        "name": "name",
        "tariffa": "fare",
    },
    "workshops": {
        "costo_classe": "price_class",
        "costo_persona": "price_person",
        "durata": "duration",
        "nome": "title",
        "tipologia": "type",
    },
}

In [None]:
db.visitor.update_many({}, {"$unset": {"indirizzo": ""}})
db.visitor.update_many({}, {"$rename": {"nome": "name "}})

UpdateResult({'n': 0, 'nModified': 0, 'ok': 1.0, 'updatedExisting': False}, acknowledged=True)

## Setting right `_id` values