# Conception BDD Normalisée et Conforme au RGPD (SIDORA AI)

In [23]:
# import
import random
from datetime import datetime
import pandas as pd
import numpy as np
from passlib.hash import argon2

from sqlalchemy import Column, Integer, String, Boolean, DateTime, ForeignKey, create_engine, Table
from sqlalchemy.orm import relationship, declarative_base, sessionmaker
from sqlalchemy.sql import func

from faker import Faker

In [24]:
# Creation structure de base
Base = declarative_base()

# table d'assosiation
promotions_regions = Table(
    "promotions_regions",
    Base.metadata,
    Column("promotion_id", Integer, ForeignKey("promotions.promotion_id"), primary_key=True),
    Column("region_id", Integer, ForeignKey("regions.region_id"), primary_key=True)
)

#Client
class Age(Base):
    __tablename__ = "ages"
    age_id = Column(Integer, primary_key=True)
    age_plage = Column(String)
    # relationship
    clients = relationship("Client", back_populates="age")

class Region(Base):
    __tablename__ = "regions"
    region_id = Column(Integer, primary_key=True)
    region_nom = Column(String)
    # relationship
    clients = relationship("Client", back_populates="region")
    promotions = relationship("Promotion", secondary= promotions_regions , back_populates="regions")

class DonnePersonnel(Base):
    __tablename__ = "donnes_personnels"

    client_id = Column(Integer,ForeignKey("clients.client_id"), primary_key=True)
    # PII
    login = Column(String, unique=True)
    mot_de_passe_hash = Column(String)  
    # RGPD
    date_suppression = Column(DateTime, nullable=True)
    anonymise = Column(Boolean, default=False)
    
    # relationship
    client = relationship("Client", back_populates="donnees")

class Client(Base):
    __tablename__ = "clients"

    client_id = Column(Integer, primary_key=True)
    age_id = Column(Integer, ForeignKey("ages.age_id"))
    region_id = Column(Integer, ForeignKey("regions.region_id"))

    # RGPD
    date_creation = Column(DateTime(timezone=True), server_default=func.now())
    date_derniere_utilisation = Column(DateTime(timezone=True), server_default=func.now())
    
    # relation
    age = relationship("Age", back_populates="clients")
    region = relationship("Region", back_populates="clients")
    donnees = relationship("DonnePersonnel",
                           back_populates="client",
                           uselist=False,
                           cascade="all, delete-orphan")
    commandes = relationship("Commande", back_populates="client")

Modèle avec intégration des exigences RGPD :

* Pseudonymisation
client_id sert d’identifiant technique ; le login n'est pas utilisé comme clé d'entité.

* Minimisation des données
La table Client ne contient aucune donnée personnelle ; la table DonneesPersonnelles regroupe uniquement les informations strictement nécessaires.

* Sécurité
Le mot de passe n’est jamais stocké en clair : utilisation d’un champ mot_de_passe_hash.

* Droit à l’effacement (Right to Erasure)
Champs dédiés : date_suppression et anonymise pour gérer la suppression ou l’anonymisation des données.

* Traçabilité
Suivi du cycle de vie via date_creation et date_derniere_utilisation.

* Séparation logique des PII
Les données personnelles sont isolées dans une table distincte pour mieux contrôler l’accès et faciliter l’anonymisation.

In [None]:
#Produit

class Platform(Base):
    __tablename__ = "platforms"
    platform_cod = Column(Integer, primary_key=True)
    platform_nom = Column(String)
    # relationship
    produits = relationship("Produit", back_populates="platform")

class Genre(Base):
    __tablename__ = "genres"
    genre_cod = Column(Integer, primary_key=True)
    genre_nom = Column(String)
    # relationship
    produits = relationship("Produit", back_populates="genre")

class Publisher(Base):
    __tablename__ = "publishers"
    publisher_cod = Column(Integer, primary_key=True)
    publisher_nom = Column(String)
    # relationship
    produits = relationship("Produit", back_populates="publisher")

class Year(Base):
    __tablename__ = "years"
    year_cod = Column(Integer, primary_key=True)
    year_nom = Column(String)
    # relationship
    produits = relationship("Produit", back_populates="year")


class Produit(Base):
    __tablename__ = "produits"
    produit_id = Column(Integer, primary_key=True)
    name = Column(String)
    prix = Column(Integer)
    year_n = Column(Integer, ForeignKey("years.year_cod"))
    platform_cod = Column(Integer, ForeignKey("platforms.platform_cod"))
    genre_cod = Column(Integer, ForeignKey("genres.genre_cod"))
    publisher_cod = Column(Integer, ForeignKey("publishers.publisher_cod"))
    # relations
    year = relationship("Year", back_populates="produits")
    platform = relationship("Platform", back_populates="produits")
    genre = relationship("Genre", back_populates="produits")
    publisher = relationship("Publisher", back_populates="produits")
    commande = relationship("Commande", back_populates="produit")
    promotions = relationship("Promotion", back_populates="produit")

In [26]:
# Promotions

class Promotion(Base):
    __tablename__ = "promotions"
    promotion_id = Column(Integer, primary_key=True)
    promotion_percent = Column(Integer)
    produit_id = Column(Integer, ForeignKey("produits.produit_id"))
    # relations
    produit = relationship("Produit", back_populates="promotions")
    commandes = relationship("Commande", back_populates="promotion")
    regions = relationship("Region", secondary=promotions_regions, back_populates="promotions")


In [27]:
# Commande

class Commande(Base):
    __tablename__ = "commandes"
    commande_id = Column(Integer, primary_key=True)
    nb_produit = Column(Integer)
    client_id = Column(Integer, ForeignKey("clients.client_id"))
    produit_id = Column(Integer, ForeignKey("produits.produit_id"))
    promotion_id = Column(Integer, ForeignKey("promotions.promotion_id"), nullable=True)
    #relations
    client = relationship("Client", back_populates="commandes")
    produit = relationship("Produit", back_populates="commande")
    promotion = relationship("Promotion", back_populates="commandes")



In [28]:
# Logging

class Log(Base):
    __tablename__ = "logs"
    log_id = Column(Integer, primary_key=True)
    horodatage = Column(DateTime(timezone=True), server_default=func.now())
    type_action = Column(String)
    table_cible = Column(String)
    client_id = Column(Integer)
    details = Column(String, nullable=True)

In [29]:
# Initialisation
engine = create_engine("sqlite:///BD_Ventes_de_jeux_video.bd")
SesseonLocal = sessionmaker(bind=engine)
Base.metadata.create_all(engine)

### Extraction de données à partir d'un fichier CSV

In [30]:
# Extraction de données à partir d'un fichier CSV
path_csv = r"data\vgsales.csv"

df = pd.read_csv(path_csv)
df = df.fillna("unknown")

In [31]:
# produit, client atributes
list_platform = list(df["Platform"].dropna().unique().astype(str))
list_genre = list(df["Genre"].dropna().unique().astype(str))
list_publisher = list(df["Publisher"].dropna().unique().astype(str))
list_year = list(df["Year"].dropna().unique().astype(str))
list_region = ["NA", "EU", "JP", "Other"]
list_age = ["0 - 6 ans", "7 - 14 ans", "15 - 32 ans", "33 - 55 ans", "55 - 120 ans"]


In [32]:
# Utilisation
Session = sessionmaker(bind=engine)
session = Session()

In [33]:
# Ramplir Genre
for i, g in enumerate(list_genre):
    obj = Genre(genre_nom=g)
    session.add(obj)

session.commit()   

In [34]:
# Ramplir Publisher
for i, p in enumerate(list_publisher):
    obj = Publisher(publisher_nom=p)
    session.add(obj)

session.commit()

In [35]:
# Ramplir Year
for i, y in enumerate(list_year):
    obj = Year(year_nom=y)
    session.add(obj)

session.commit()

In [36]:
# Ramplir Platforme
for i, p in enumerate(list_platform):
    obj = Platform(platform_nom=p)
    session.add(obj)

session.commit()

In [37]:
# Ramplir Region
for i, g in enumerate(list_region):
    obj = Region(region_nom=g)
    session.add(obj)

session.commit()  

In [38]:
# Ramplir Produit

df_produit = df[["Name", "Platform",	"Year",	"Genre", "Publisher"]]

for index, row in df_produit.iterrows():
    obj = Produit(
        name = row["Name"],
        prix = np.random.randint(20, 150),
        year_n = list_year.index(str(row["Year"])),
        platform_cod = list_platform.index(row["Platform"]),
        genre_cod = list_genre.index(row["Genre"]),
        publisher_cod = list_publisher.index(row["Publisher"])
        )
    session.add(obj)

session.commit()  

In [39]:
# Ramplir Age
for i, a in enumerate(list_age):
    obj = Age(age_plage=a)
    session.add(obj)

session.commit() 


### Génération des données client avec Facker

In [40]:
# Creation de faux donnes personneles
fake = Faker()

for _ in range(350):
    # creation de nouveau client
    client = Client(
        region_id=np.random.randint(0, 3),
        age_id=max(0, min(3, int(np.round(np.random.normal(loc=1.5, scale=1)))))
    )
    session.add(client)
    session.flush()  # pour avoir client_id

    # creation de donnes personnales
    donne = DonnePersonnel(
        login=fake.unique.user_name(),
        mot_de_passe_hash=argon2.hash(fake.password(
            length=5,
            digits=True,
            upper_case=True,
            lower_case=True,
            )),
        client_id=client.client_id  # lien à client
    )
    session.add(donne)

session.commit()

### Génération du tableau des commandes

In [41]:
# Ramplir Promotion
N_client = 350
N_produit = df_produit.shape[0]

regions = session.query(Region).all()

for i in range(15):
    obj = Promotion(
        produit_id = np.random.randint(N_produit),
        promotion_percent = np.random.randint(1, 9) * 10
        )
    region = random.choice(regions)
    obj.regions.append(region)
    session.add(obj)

session.commit()


In [None]:
# Ramplir Commande
clients = session.query(Client.client_id).all()
client_ids = [c[0] for c in clients]

for i in range(5000):
    client_id = random.choice(client_ids)
    client = session.get(Client, client_id)
    if client is None:
        continue  
    region = client.region_id

    produit_id = np.random.randint(N_produit)
    # cherche une promotion
    promotion = (
        session.query(Promotion)
        .join(promotions_regions)
        .join(Region)
        .filter(
            Region.region_id == region,
            Promotion.produit_id == produit_id
        )
        .first()
    )

    promotion_id = promotion.promotion_id if promotion else None  
      
    obj = Commande(
        nb_produit=np.random.randint(1, 5),
        produit_id = produit_id,
        client_id = client_id,
        promotion_id = promotion_id 
        )
    session.add(obj)

session.commit()

In [50]:
# Fermer le session
session.close()
engine.dispose()
