# Ejercicio Formativo 1 Capítulo 6

## Importando Librerías

In [1]:
import json
import sqlite3

## Misión 1: Lectura y exploración de datos

In [2]:
with open('laureates.json', encoding = 'utf8') as laureates_file:
    laureates = json.load(laureates_file)

In [4]:
for index in range(5):
    print(laureates[index])

{'id': '1', 'firstname': 'Wilhelm Conrad', 'surname': 'Röntgen', 'born': '1845-03-27', 'died': '1923-02-10', 'bornCountry': 'Prussia (now Germany)', 'bornCountryCode': 'DE', 'bornCity': 'Lennep (now Remscheid)', 'diedCountry': 'Germany', 'diedCountryCode': 'DE', 'diedCity': 'Munich', 'gender': 'male', 'prizes': [{'year': '1901', 'category': 'physics', 'share': '1', 'motivation': '"in recognition of the extraordinary services he has rendered by the discovery of the remarkable rays subsequently named after him"', 'affiliations': [{'name': 'Munich University', 'city': 'Munich', 'country': 'Germany'}]}]}
{'id': '2', 'firstname': 'Hendrik A.', 'surname': 'Lorentz', 'born': '1853-07-18', 'died': '1928-02-04', 'bornCountry': 'the Netherlands', 'bornCountryCode': 'NL', 'bornCity': 'Arnhem', 'diedCountry': 'the Netherlands', 'diedCountryCode': 'NL', 'gender': 'male', 'prizes': [{'year': '1902', 'category': 'physics', 'share': '2', 'motivation': '"in recognition of the extraordinary service they

In [5]:
firstLaureates = laureates[0]
for (key, value) in firstLaureates.items():
    print(f"{key}: {value}")

id: 1
firstname: Wilhelm Conrad
surname: Röntgen
born: 1845-03-27
died: 1923-02-10
bornCountry: Prussia (now Germany)
bornCountryCode: DE
bornCity: Lennep (now Remscheid)
diedCountry: Germany
diedCountryCode: DE
diedCity: Munich
gender: male
prizes: [{'year': '1901', 'category': 'physics', 'share': '1', 'motivation': '"in recognition of the extraordinary services he has rendered by the discovery of the remarkable rays subsequently named after him"', 'affiliations': [{'name': 'Munich University', 'city': 'Munich', 'country': 'Germany'}]}]


La información resultante esta organizada en una lista de diccionarios donde cada diccionario corresponde a un ganador de un premio Nobel.

Al revisar la información de la base de datos, se pudo discernir que no todos los elemento de `laureates`, que son diccionarios, tienen todas las llaves que deberían tener. Para poder trabajar con estos datos de manera más limpia se eliminaron los diccionarios que no tenían todas las llaves.

In [6]:
allKeys = laureates[0].keys()

In [7]:
filterLaureates = list(filter(lambda laureate: laureate.keys() == allKeys, laureates))

## Misión 2: Modelación de entidades

Se pueden distinguir las siguientes entidades:

- **Ganadores del premio Nobel**: Tiene los siguientes campos:
    - `id`: Identificador único del ganador.
    - `firstname`: Nombre del ganador.
    - `surname`: Apellido del ganador.
    - `born`: Fecha de nacimiento del ganador.
    - `died`: Fecha de muerte del ganador.
    - `bornCountry`: País de nacimiento del ganador.
    - `bornCountryCode`: Código del país de nacimiento del ganador.
    - `bornCity`: Ciudad de nacimiento del ganador.
    - `diedCountry`: País de muerte del ganador.
    - `diedCountryCode`: Código del país de muerte del ganador.
    - `diedCity`: Ciudad de muerte del ganador.
    - `gender`: Género del ganador.
    - `prizes`: Lista de premios ganados por el ganador.
- **Premios**: Tienes los siguientes campos:
    - `year`: Año en que se otorgó el premio.
    - `category`: Categoría del premio.
    - `share`: Número de ganadores del premio.
    - `motivation`: Motivación del premio.
    - `affiliations`: Lista de afiliaciones de los ganadores.
- **Afiliación**: La afiliación corresponden a la institución o lugar al que estaba asociado el ganador al momento de recibir el premio. Tiene los siguientes campos:
    - `name`: Nombre de la institución.
    - `city`: Ciudad de la institución.
    - `country`: País de la institución.

Para estar definir la cardinalidad de las relaciones, se revisará a más detalle la información obtenida. Lo que se busca saber es lo siguiente:

- Una persona puede tener varios premios?
- Una persona puede tener varias afiliaciones?

In [9]:
ganadoresVariosPremios = []
for laureate in filterLaureates:
    if len(laureate["prizes"]) > 1:
        ganadoresVariosPremios.append(
            {
                "id": laureate["id"],
                "fullname": f"{laureate['firstname']} {laureate['surname']}",
                "premios": len(laureate["prizes"])
            }
        )

In [10]:
print(ganadoresVariosPremios)

[{'id': '6', 'fullname': 'Marie Curie', 'premios': 2}, {'id': '66', 'fullname': 'John Bardeen', 'premios': 2}, {'id': '217', 'fullname': 'Linus Pauling', 'premios': 2}, {'id': '222', 'fullname': 'Frederick Sanger', 'premios': 2}]


In [11]:
ganadoresVariasAfiliaciones = []
for laureate in filterLaureates:
    for prize in laureate["prizes"]:
        if len(prize["affiliations"]) > 1:
            ganadoresVariasAfiliaciones.append(
            {
                "id": laureate["id"],
                "fullname": f"{laureate['firstname']} {laureate['surname']}",
                "afiliaciones": len(prize["affiliations"])
            }
        )

In [12]:
print(ganadoresVariasAfiliaciones)

[{'id': '54', 'fullname': 'Hideki Yukawa', 'afiliaciones': 2}, {'id': '62', 'fullname': 'Walther Bothe', 'afiliaciones': 2}, {'id': '71', 'fullname': 'Igor Y. Tamm', 'afiliaciones': 2}, {'id': '114', 'fullname': 'Abdus Salam', 'afiliaciones': 2}, {'id': '142', 'fullname': 'Georges Charpak', 'afiliaciones': 2}, {'id': '189', 'fullname': 'Carl Bosch', 'afiliaciones': 2}, {'id': '190', 'fullname': 'Friedrich Bergius', 'afiliaciones': 2}, {'id': '195', 'fullname': 'Peter Debye', 'afiliaciones': 2}, {'id': '198', 'fullname': 'Richard Kuhn', 'afiliaciones': 2}, {'id': '199', 'fullname': 'Adolf Butenandt', 'afiliaciones': 2}, {'id': '216', 'fullname': 'Hermann Staudinger', 'afiliaciones': 2}, {'id': '220', 'fullname': 'Nikolay Semenov', 'afiliaciones': 2}, {'id': '250', 'fullname': 'Ilya Prigogine', 'afiliaciones': 2}, {'id': '302', 'fullname': 'Paul Ehrlich', 'afiliaciones': 2}, {'id': '328', 'fullname': 'William P. Murphy', 'afiliaciones': 2}, {'id': '348', 'fullname': 'Egas Moniz', 'afilia

Cardinalidades:

- Un ganador del premio Nobel tiene asociados uno o varios premios.
- Un ganador del premio Nobel puede tener una o varias afiliaciones.

## Misión 3: Creación de tablas

In [13]:
connection = sqlite3.connect('laureates.db')
cursor = connection.cursor()

In [14]:
cursor.execute(
    "CREATE TABLE IF NOT EXISTS Winners(wid INTEGER PRIMARY KEY, firstname TEXT, surname TEXT, born DATE, died DATE, bornCountry TEXT, bornCountryCode TEXT, bornCity TEXT, diedCountry TEXT, diedCountryCode TEXT, diedCity TEXT, gender TEXT)"
)
cursor.execute(
    "CREATE TABLE IF NOT EXISTS Prizes(pid INTEGER PRIMARY KEY, year INTEGER, category TEXT, share INTEGER, motivation TEXT)"
)
cursor.execute(
    "CREATE TABLE IF NOT EXISTS Affiliations(aid INTEGER PRIMARY KEY, name TEXT, city TEXT, country TEXT)"
)

<sqlite3.Cursor at 0x2bef9b199c0>

## Misión 4: Creación de tablas de relación entre entidades

In [15]:
cursor.execute(
    "CREATE TABLE IF NOT EXISTS WinnersPrizes(winner_id INTEGER, prize_id INTEGER, FOREIGN KEY (winner_id) REFERENCES Winners, FOREIGN KEY (prize_id) REFERENCES Prizes)"
)
cursor.execute(
    "CREATE TABLE IF NOT EXISTS WinnersAffiliations(winner_id INTEGER, affiliation_id INTEGER, FOREIGN KEY (winner_id) REFERENCES Winners, FOREIGN KEY (affiliation_id) REFERENCES Affiliations)"
)

<sqlite3.Cursor at 0x2bef9b199c0>

In [16]:
connection.commit()
connection.close()

Revisamos si efectivamente se creo:

In [17]:
connection = sqlite3.connect('laureates.db')
cursor = connection.cursor()

cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
print("Tablas:\n")
for table in cursor.fetchall():
    print(table[0])
    cursor.execute(f'PRAGMA table_info([{table[0]}])')
    print(cursor.fetchall())
    print()
connection.close()

Tablas:

Winners
[(0, 'wid', 'INTEGER', 0, None, 1), (1, 'firstname', 'TEXT', 0, None, 0), (2, 'surname', 'TEXT', 0, None, 0), (3, 'born', 'DATE', 0, None, 0), (4, 'died', 'DATE', 0, None, 0), (5, 'bornCountry', 'TEXT', 0, None, 0), (6, 'bornCountryCode', 'TEXT', 0, None, 0), (7, 'bornCity', 'TEXT', 0, None, 0), (8, 'diedCountry', 'TEXT', 0, None, 0), (9, 'diedCountryCode', 'TEXT', 0, None, 0), (10, 'diedCity', 'TEXT', 0, None, 0), (11, 'gender', 'TEXT', 0, None, 0)]

Prizes
[(0, 'pid', 'INTEGER', 0, None, 1), (1, 'year', 'INTEGER', 0, None, 0), (2, 'category', 'TEXT', 0, None, 0), (3, 'share', 'INTEGER', 0, None, 0), (4, 'motivation', 'TEXT', 0, None, 0)]

Affiliations
[(0, 'aid', 'INTEGER', 0, None, 1), (1, 'name', 'TEXT', 0, None, 0), (2, 'city', 'TEXT', 0, None, 0), (3, 'country', 'TEXT', 0, None, 0)]

WinnersPrizes
[(0, 'winner_id', 'INTEGER', 0, None, 0), (1, 'prize_id', 'INTEGER', 0, None, 0)]

WinnersAffiliations
[(0, 'winner_id', 'INTEGER', 0, None, 0), (1, 'affiliation_id', '

## Misión 5: Carga de datos en las tablas

In [18]:
affiliations = {}
winners = {}
prizes = {}
aid = 1
wid = 1
pid = 1

In [19]:
allKeysPrizes = filterLaureates[0]["prizes"][0].keys()

In [20]:
allKeysAffiliations = filterLaureates[0]["prizes"][0]["affiliations"][0].keys()

In [21]:
print(allKeysPrizes)

dict_keys(['year', 'category', 'share', 'motivation', 'affiliations'])


In [22]:
print(allKeysAffiliations)

dict_keys(['name', 'city', 'country'])


Extraemos la información de los diccionarios y la cargamos en las tablas correspondientes.

In [23]:
for laureate in filterLaureates:
    winner = (
        laureate["firstname"],
        laureate["surname"],
        laureate["born"],
        laureate["died"],
        laureate["bornCountry"],
        laureate["bornCountryCode"],
        laureate["bornCity"],
        laureate["diedCountry"],
        laureate["diedCountryCode"],
        laureate["diedCity"],
        laureate["gender"],
    )
    if winner not in winners:
        winners[winner] = wid
        wid += 1
        
        for prize in laureate["prizes"]:
            if prize.keys() == allKeysPrizes:
                newPrize = (
                    prize["year"],
                    prize["category"],
                    prize["share"],
                    prize["motivation"],
                )
                if newPrize not in prizes:
                    prizes[newPrize] = pid
                    pid += 1
                    
                for affiliation in prize["affiliations"]:
                    if isinstance(affiliation, dict) and affiliation.keys() == allKeysAffiliations:
                        newAffiliation = (
                            affiliation["name"],
                            affiliation["city"],
                            affiliation["country"],
                        )
                        if newAffiliation not in affiliations:
                            affiliations[newAffiliation] = aid
                            aid += 1

Ahora se revisa nuevamente la información para crear los diccionarios con las relaciones.

In [24]:
winnersPrizes = []
winnersAffiliations = []

In [25]:
for laureate in filterLaureates:
    keyWinner = (
        laureate["firstname"],
        laureate["surname"],
        laureate["born"],
        laureate["died"],
        laureate["bornCountry"],
        laureate["bornCountryCode"],
        laureate["bornCity"],
        laureate["diedCountry"],
        laureate["diedCountryCode"],
        laureate["diedCity"],
        laureate["gender"],
    )
    for prize in laureate["prizes"]:
        if prize.keys() == allKeysPrizes:
            keyPrize = (
                prize["year"],
                prize["category"],
                prize["share"],
                prize["motivation"],
            )
            if keyWinner in winners and keyPrize in prizes:
                winnersPrizes.append((winners[keyWinner], prizes[keyPrize]))
            for affiliation in prize["affiliations"]:
                if isinstance(affiliation, dict) and affiliation.keys() == allKeysAffiliations:
                    keyAffiliation = (
                        affiliation["name"],
                        affiliation["city"],
                        affiliation["country"],
                    )
                    if keyWinner in winners and keyAffiliation in affiliations:
                        winnersAffiliations.append((winners[keyWinner], affiliations[keyAffiliation]))

In [26]:
#print(winners)

{('Wilhelm Conrad', 'Röntgen', '1845-03-27', '1923-02-10', 'Prussia (now Germany)', 'DE', 'Lennep (now Remscheid)', 'Germany', 'DE', 'Munich', 'male'): 1, ('Pieter', 'Zeeman', '1865-05-25', '1943-10-09', 'the Netherlands', 'NL', 'Zonnemaire', 'the Netherlands', 'NL', 'Amsterdam', 'male'): 2, ('Pierre', 'Curie', '1859-05-15', '1906-04-19', 'France', 'FR', 'Paris', 'France', 'FR', 'Paris', 'male'): 3, ('Marie', 'Curie', '1867-11-07', '1934-07-04', 'Russian Empire (now Poland)', 'PL', 'Warsaw', 'France', 'FR', 'Sallanches', 'female'): 4, ('Philipp', 'Lenard', '1862-06-07', '1947-05-20', 'Hungary (now Slovakia)', 'SK', 'Pressburg (now Bratislava)', 'Germany', 'DE', 'Messelhausen', 'male'): 5, ('J.J.', 'Thomson', '1856-12-18', '1940-08-30', 'United Kingdom', 'GB', 'Cheetham Hill', 'United Kingdom', 'GB', 'Cambridge', 'male'): 6, ('Albert A.', 'Michelson', '1852-12-19', '1931-05-09', 'Prussia (now Poland)', 'PL', 'Strelno (now Strzelno)', 'USA', 'US', 'Pasadena, CA', 'male'): 7, ('Guglielmo'

In [27]:
#print(prizes)

{('1901', 'physics', '1', '"in recognition of the extraordinary services he has rendered by the discovery of the remarkable rays subsequently named after him"'): 1, ('1902', 'physics', '2', '"in recognition of the extraordinary service they rendered by their researches into the influence of magnetism upon radiation phenomena"'): 2, ('1903', 'physics', '4', '"in recognition of the extraordinary services they have rendered by their joint researches on the radiation phenomena discovered by Professor Henri Becquerel"'): 3, ('1911', 'chemistry', '1', '"in recognition of her services to the advancement of chemistry by the discovery of the elements radium and polonium, by the isolation of radium and the study of the nature and compounds of this remarkable element"'): 4, ('1905', 'physics', '1', '"for his work on cathode rays"'): 5, ('1906', 'physics', '1', '"in recognition of the great merits of his theoretical and experimental investigations on the conduction of electricity by gases"'): 6, (

In [28]:
#print(affiliations)

{('Munich University', 'Munich', 'Germany'): 1, ('Amsterdam University', 'Amsterdam', 'the Netherlands'): 2, ('École municipale de physique et de chimie industrielles (Municipal School of Industrial Physics and Chemistry)', 'Paris', 'France'): 3, ('Sorbonne University', 'Paris', 'France'): 4, ('Kiel University', 'Kiel', 'Germany'): 5, ('University of Cambridge', 'Cambridge', 'United Kingdom'): 6, ('University of Chicago', 'Chicago, IL', 'USA'): 7, ('Marconi Wireless Telegraph Co. Ltd.', 'London', 'United Kingdom'): 8, ('Strasbourg University', 'Strasbourg', 'Germany (now France)'): 9, ('Würzburg University', 'Würzburg', 'Germany'): 10, ('Swedish Gas-Accumulator Co.', 'Lidingö, Stockholm', 'Sweden'): 11, ('Leiden University', 'Leiden', 'the Netherlands'): 12, ('Frankfurt-on-the-Main University', 'Frankfurt-on-the-Main', 'Germany'): 13, ('University College', 'London', 'United Kingdom'): 14, ('Victoria University', 'Manchester', 'United Kingdom'): 15, ('Edinburgh University', 'Edinburgh'

In [29]:
#print(winnersPrizes)

[(1, 1), (2, 2), (3, 3), (4, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 8), (10, 9), (11, 10), (12, 11), (13, 12), (14, 13), (15, 14), (16, 14), (17, 15), (18, 16), (19, 17), (20, 18), (21, 19), (22, 20), (23, 21), (24, 22), (25, 23), (26, 23), (27, 24), (28, 25), (29, 26), (30, 27), (31, 28), (32, 29), (33, 30), (34, 31), (35, 31), (36, 32), (37, 33), (38, 34), (39, 35), (40, 35), (41, 36), (42, 37), (43, 38), (44, 39), (45, 40), (46, 41), (47, 42), (48, 43), (49, 44), (50, 45), (51, 45), (52, 46), (53, 46), (54, 47), (55, 48), (56, 49), (57, 50), (58, 51), (59, 52), (60, 52), (60, 53), (61, 52), (62, 54), (63, 55), (64, 55), (65, 56), (66, 57), (67, 58), (68, 59), (69, 60), (70, 60), (71, 61), (72, 62), (73, 62), (74, 63), (75, 63), (76, 63), (77, 64), (78, 65), (79, 66), (80, 67), (81, 68), (82, 69), (83, 70), (84, 53), (85, 71), (86, 72), (87, 72), (88, 73), (89, 74), (90, 74), (91, 74), (92, 75), (93, 76), (94, 76), (95, 77), (96, 77), (97, 78), (98, 78), (99, 79), (100, 80),

In [30]:
#print(winnersAffiliations)

[(1, 1), (2, 2), (3, 3), (4, 4), (5, 5), (6, 6), (7, 7), (8, 8), (9, 9), (10, 2), (11, 10), (12, 11), (13, 12), (14, 13), (15, 14), (16, 15), (17, 16), (18, 17), (19, 18), (20, 19), (21, 20), (22, 21), (23, 22), (24, 23), (25, 24), (26, 25), (27, 4), (28, 7), (29, 6), (30, 26), (31, 27), (32, 28), (33, 29), (34, 17), (35, 6), (36, 30), (37, 31), (38, 22), (39, 32), (40, 26), (41, 33), (42, 34), (43, 35), (44, 36), (45, 37), (46, 38), (47, 39), (48, 15), (49, 40), (49, 36), (50, 41), (51, 42), (52, 43), (53, 38), (54, 44), (55, 16), (56, 45), (56, 46), (57, 43), (58, 36), (59, 47), (60, 48), (60, 48), (61, 49), (62, 50), (62, 51), (63, 34), (64, 34), (65, 34), (66, 43), (67, 52), (68, 37), (69, 53), (70, 45), (71, 54), (72, 51), (73, 51), (74, 55), (75, 38), (76, 22), (77, 56), (78, 57), (79, 34), (80, 22), (81, 58), (82, 59), (83, 60), (84, 61), (85, 6), (86, 62), (87, 36), (88, 63), (89, 49), (90, 6), (91, 38), (92, 52), (93, 64), (93, 60), (94, 38), (95, 7), (96, 37), (97, 38), (98, 

Insertamos la información en la base de datos.

In [31]:
connection = sqlite3.connect('laureates.db')
cursor = connection.cursor()

for winner, wid in winners.items():
    cursor.execute(
        "INSERT INTO Winners(wid, firstname, surname, born, died, bornCountry, bornCountryCode, bornCity, diedCountry, diedCountryCode, diedCity, gender) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
        (wid, *winner)
    )

for prize, pid in prizes.items():
    cursor.execute(
        "INSERT INTO Prizes(pid, year, category, share, motivation) VALUES (?, ?, ?, ?, ?)",
        (pid, *prize)
    )

for affiliation, aid in affiliations.items():
    cursor.execute(
        "INSERT INTO Affiliations(aid, name, city, country) VALUES (?, ?, ?, ?)",
        (aid, *affiliation)
    )

for winner_id, prize_id in winnersPrizes:
    cursor.execute(
        "INSERT INTO WinnersPrizes(winner_id, prize_id) VALUES (?, ?)",
        (winner_id, prize_id)
    )

for winner_id, affiliation_id in winnersAffiliations:
    cursor.execute(
        "INSERT INTO WinnersAffiliations(winner_id, affiliation_id) VALUES (?, ?)",
        (winner_id, affiliation_id)
    )

connection.commit()
connection.close()