## Request

Dans un programme réalisé dans le langage de votre choix (de préférence C, C++, Java ou Python), réalisez au moins trois requêtes et faites afficher les résultats de façon lisible et compréhensible pour un non informaticien : liste des départements d'une région donnée, liste des communes de plus de X habitants d'un département donné, la région la plus/la moins peuplée, les communes les plus/les moins peuplées d'un département, etc.

In [None]:
import psycopg2
import psycopg2.extras
import pandas as pd
from db import connect

conn = connect()
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

In [None]:
import drop
import my_create_table

drop.drop_tables(cur, conn)
my_create_table.create_tables(cur, conn)

In [None]:
import insert_commune
import insert_stats_population

insert_commune.fill_tables_commune()
insert_stats_population.fill_tables_population('data/statistiques/population/base-cc-serie-historique-2020.csv', 2000, 2020) #30s-1min

### Liste des départements d'une Region

In [None]:
import pandas as pd

nom_region = "Nouvelle-Aquitaine"

request = f"""
SELECT d.id_departement, d.nom_departement
FROM departement d
JOIN region r ON d.id_region = r.id_region
WHERE r.nom_region = '{nom_region}';
"""

cur.execute(request)
rows = cur.fetchall()

df = pd.DataFrame(rows, columns=['id_departement', 'nom_departement'])
print(df)


### Liste des communes de plus de X habitants d'un département donné

In [None]:
# liste des communes de plus de X habitants d'un département donné

# Code du département et nombre minimum d'habitants
id_departement = "24"
min_population = 10_000

request = f"""
SELECT c.id_commune, c.nom_commune, sp.valeur AS population
FROM commune c
JOIN statistiques_population sp ON c.id_commune = sp.codgeo
WHERE c.id_departement = '{id_departement}'
  AND sp.annee = 2020
  AND sp.type_statistique = 'Population'
  AND sp.valeur > {min_population};
"""

cur.execute(request)
rows = cur.fetchall()
df = pd.DataFrame(rows, columns=['id_commune', 'nom_commune', 'population'])
print(df)


### La région la plus peuplé

In [None]:
request = """
SELECT r.id_region, r.nom_region, SUM(sp.valeur) AS population_totale
FROM region r
JOIN departement d ON r.id_region = d.id_region
JOIN commune c ON d.id_departement = c.id_departement
JOIN statistiques_population sp ON c.id_commune = sp.codgeo
WHERE sp.annee = 2020 AND sp.type_statistique = 'Population'
GROUP BY r.id_region, r.nom_region
ORDER BY population_totale DESC
LIMIT 1;
"""

cur.execute(request)
rows = cur.fetchall()
df = pd.DataFrame(rows, columns=['id_region', 'nom_region', 'population_totale'])
print(df)


## Views

Créer deux vues (cf commande CREATE OR REPLACE VIEW) qui donnent la population des départements et des régions pour les différentes années ainsi que les indicateurs existants.

### Vue 1 : Population des départements

In [None]:
vue = """
CREATE OR REPLACE VIEW vue_population_departement AS
SELECT d.id_departement, d.nom_departement, sp.annee, sp.type_statistique, SUM(sp.valeur) AS population_totale
FROM departement d
JOIN commune c ON d.id_departement = c.id_departement
JOIN statistiques_population sp ON c.id_commune = sp.codgeo
WHERE sp.type_statistique = 'Population'
GROUP BY d.id_departement, d.nom_departement, sp.annee, sp.type_statistique;
"""

cur.execute(vue)

request = """
SELECT *
FROM vue_population_departement
WHERE annee = 2020
ORDER BY population_totale DESC;
"""
cur.execute(request)
conn.commit()
rows = cur.fetchall()
df = pd.DataFrame(rows, columns=['id_departement', 'nom_departement', 'annee', 'type_statistique', 'population_totale'])
df


### Vue 2 : Population des régions

In [None]:
vue_region = """
CREATE OR REPLACE VIEW vue_population_region AS
SELECT r.id_region, r.nom_region, sp.annee, sp.type_statistique, SUM(sp.valeur) AS population_totale
FROM region r
JOIN departement d ON r.id_region = d.id_region
JOIN commune c ON d.id_departement = c.id_departement
JOIN statistiques_population sp ON c.id_commune = sp.codgeo
WHERE sp.type_statistique = 'Population'
GROUP BY r.id_region, r.nom_region, sp.annee, sp.type_statistique;
"""

try:
    cur.execute(vue_region)
    conn.commit()  
    
    request = """
    SELECT *
    FROM vue_population_region
    WHERE annee = 2020
    ORDER BY population_totale DESC;
    """
    
    cur.execute(request)
    rows = cur.fetchall()
    df = pd.DataFrame(rows, columns=['id_region', 'nom_region', 'annee', 'type_statistique', 'population_totale'])
    print(df)
    
except Exception as e:
    conn.rollback() 
    print(f"Error: {e}")


In [None]:
conn.commit()

## Procédure stockée

In [None]:
alter_departements = "ALTER TABLE departement ADD COLUMN population_totale INT;"
alter_regions = "ALTER TABLE region ADD COLUMN population_totale INT;"

cur.execute(alter_departements)
cur.execute(alter_regions)
conn.commit()

# Créer la procédure stockée
procedure_calcul = """
CREATE OR REPLACE PROCEDURE calculer_population()
LANGUAGE plpgsql
AS $$
BEGIN
    -- Calculer la population des départements
    UPDATE departement d
    SET population_totale = sub.population
    FROM (
        SELECT c.id_departement, SUM(sp.valeur) AS population
        FROM commune c
        JOIN statistiques_population sp ON c.id_commune = sp.codgeo
        WHERE sp.type_statistique = 'Population' AND sp.annee = 2020
        GROUP BY c.id_departement
    ) AS sub
    WHERE d.id_departement = sub.id_departement;

    -- Calculer la population des régions
    UPDATE region r
    SET population_totale = sub.population
    FROM (
        SELECT d.id_region, SUM(d.population_totale) AS population
        FROM departement d
        GROUP BY d.id_region
    ) AS sub
    WHERE r.id_region = sub.id_region;
END;
$$;
"""

cur.execute(procedure_calcul)
conn.commit()

# Exécuter la procédure stockée
cur.execute("CALL calculer_population();")
conn.commit()

# Fermeture de la connexion
cur.close()
conn.close()

In [None]:
# On verifie que les colonnes ont bien été ajoutées et que les populations ont bien été calculées
conn = connect()
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

request = """
SELECT * FROM departement;
"""
cur.execute(request)
rows = cur.fetchall()
df = pd.DataFrame(rows, columns=['id_departement', 'nom_departement', 'id_region', 'population_totale'])
print(df)

## Triggers

In [None]:
try:
    # Création du trigger pour empêcher les modifications dans la table region
    bloque_region = """
    CREATE OR REPLACE FUNCTION bloque_modifications_region()
    RETURNS trigger AS $$
    BEGIN
        RAISE EXCEPTION 'Modification de la table region non autorisée';
        RETURN NULL;
    END;
    $$ LANGUAGE plpgsql;

    CREATE TRIGGER tr_bloquer_modifications_region
    BEFORE INSERT OR UPDATE OR DELETE ON region
    FOR EACH ROW EXECUTE FUNCTION bloque_modifications_region();
    """
    
    # Création du trigger pour empêcher les modifications dans la table departement
    bloque_departement = """
    CREATE OR REPLACE FUNCTION bloque_modifications_departement()
    RETURNS trigger AS $$
    BEGIN
        RAISE EXCEPTION 'Modification de la table departement non autorisée';
        RETURN NULL;
    END;
    $$ LANGUAGE plpgsql;

    CREATE TRIGGER tr_bloquer_modifications_departement
    BEFORE INSERT OR UPDATE OR DELETE ON departement
    FOR EACH ROW EXECUTE FUNCTION bloque_modifications_departement();
    """
    
    # Création de la procédure pour mettre à jour les populations
    procedure_calcul = """
    CREATE OR REPLACE PROCEDURE calculer_population()
    LANGUAGE plpgsql
    AS $$
    BEGIN
        -- Calculer la population des départements
        UPDATE departement d
        SET population_totale = sub.population
        FROM (
            SELECT c.id_departement, SUM(sp.valeur) AS population
            FROM commune c
            JOIN statistiques_population sp ON c.id_commune = sp.codgeo
            WHERE sp.type_statistique = 'Population' AND sp.annee = 2020
            GROUP BY c.id_departement
        ) AS sub
        WHERE d.id_departement = sub.id_departement;

        -- Calculer la population des régions
        UPDATE region r
        SET population_totale = sub.population
        FROM (
            SELECT d.id_region, SUM(d.population_totale) AS population
            FROM departement d
            GROUP BY d.id_region
        ) AS sub
        WHERE r.id_region = sub.id_region;
    END;
    $$;
    """
    
    # Création du trigger pour mettre à jour les populations
    maj_population = """
    CREATE OR REPLACE FUNCTION maj_population()
    RETURNS trigger AS $$
    BEGIN
        PERFORM calculer_population();
        RETURN NEW;
    END;
    $$ LANGUAGE plpgsql;

    CREATE TRIGGER tr_maj_population
    AFTER INSERT OR UPDATE ON statistiques_population
    FOR EACH ROW EXECUTE FUNCTION maj_population();
    """

    # Exécuter les commandes
    cur.execute(bloque_region)
    cur.execute(bloque_departement)
    cur.execute(procedure_calcul)
    cur.execute(maj_population)
    
    # Valider les changements
    conn.commit()

except Exception as e:
    # Annuler la transaction en cas d'erreur
    conn.rollback()
    print(f"Error: {e}")

finally:
    # Fermer la connexion
    cur.close()
    conn.close()


## Triggers suite

In [None]:
conn = connect()
cur = conn.cursor(cursor_factory=psycopg2.extras.DictCursor)

try:
    # Création de la procédure pour mettre à jour la population d'un département
    procedure_update_departement = """
    CREATE OR REPLACE PROCEDURE update_population_departement()
    LANGUAGE plpgsql
    AS $$
    BEGIN
        UPDATE departement d
        SET population_totale = sub.population
        FROM (
            SELECT c.id_departement, SUM(sp.valeur) AS population
            FROM commune c
            JOIN statistiques_population sp ON c.id_commune = sp.codgeo
            WHERE sp.type_statistique = 'Population' AND sp.annee IN (2020, 2021, 2022, 2023)
            GROUP BY c.id_departement
            HAVING COUNT(c.id_commune) = (
                SELECT COUNT(*)
                FROM commune
                WHERE id_departement = c.id_departement
            )
        ) AS sub
        WHERE d.id_departement = sub.id_departement;
    END;
    $$;
    """

    # Création de la procédure pour mettre à jour la population d'une région
    procedure_update_region = """
    CREATE OR REPLACE PROCEDURE update_population_region()
    LANGUAGE plpgsql
    AS $$
    BEGIN
        UPDATE region r
        SET population_totale = sub.population
        FROM (
            SELECT d.id_region, SUM(d.population_totale) AS population
            FROM departement d
            GROUP BY d.id_region
            HAVING COUNT(d.id_departement) = (
                SELECT COUNT(*)
                FROM departement
                WHERE id_region = d.id_region
            )
        ) AS sub
        WHERE r.id_region = sub.id_region;
    END;
    $$;
    """

    # Création de la procédure qui met à jour les populations des départements et des régions
    procedure_update_all = """
    CREATE OR REPLACE PROCEDURE update_population_all()
    LANGUAGE plpgsql
    AS $$
    BEGIN
        PERFORM update_population_departement();
        PERFORM update_population_region();
    END;
    $$;
    """

    # Création du trigger pour mettre à jour les populations
    trigger_update_population = """
    CREATE OR REPLACE FUNCTION trigger_population_update()
    RETURNS trigger AS $$
    BEGIN
        PERFORM update_population_all();
        RETURN NEW;
    END;
    $$ LANGUAGE plpgsql;

    CREATE TRIGGER tr_update_population
    AFTER INSERT OR UPDATE ON statistiques_population
    FOR EACH ROW EXECUTE FUNCTION trigger_population_update();
    """

    # Exécution des commandes
    cur.execute(procedure_update_departement)
    cur.execute(procedure_update_region)
    cur.execute(procedure_update_all)
    cur.execute(trigger_update_population)

    # Valider les changements
    conn.commit()

except Exception as e:
    # Annuler la transaction en cas d'erreur
    conn.rollback()
    print(f"Error: {e}")

# finally:
#     cur.close()
#     conn.close()s


## Plan d'exécution (EXPLAIN)
### La région la plus peuplé

In [None]:
request = """
EXPLAIN ANALYSE SELECT r.id_region, r.nom_region, SUM(sp.valeur) AS population_totale
FROM region r
JOIN departement d ON r.id_region = d.id_region
JOIN commune c ON d.id_departement = c.id_departement
JOIN statistiques_population sp ON c.id_commune = sp.codgeo
WHERE sp.annee = 2020 AND sp.type_statistique = 'Population'
GROUP BY r.id_region, r.nom_region
ORDER BY population_totale DESC
LIMIT 1;
"""

cur.execute(request)
rows = cur.fetchall()
df = pd.DataFrame(rows)
print(df[0][0])

## Plan d'exécution et index

In [None]:
request = """
CREATE INDEX pop_idx ON statistiques_population (valeur) WHERE type_statistique = 'Population';
EXPLAIN ANALYSE SELECT r.id_region, r.nom_region, SUM(sp.valeur) AS population_totale
FROM region r
JOIN departement d ON r.id_region = d.id_region
JOIN commune c ON d.id_departement = c.id_departement
JOIN statistiques_population sp ON c.id_commune = sp.codgeo
WHERE sp.annee = 2020 AND sp.type_statistique = 'Population'
GROUP BY r.id_region, r.nom_region
ORDER BY population_totale DESC
LIMIT 1;
"""

cur.execute(request)
rows = cur.fetchall()
df = pd.DataFrame(rows)
print(df[0][0])

## Pour aller plus loin : transactions