# Conception BDD Normalisée et Conforme au RGPD (SIDORA AI)

In [105]:
# import
from sqlalchemy import Column, Integer, String, ForeignKey, create_engine
from sqlalchemy.orm import declarative_base, relationship, sessionmaker

import pandas as pd
import numpy as np
from faker import Faker

In [106]:
# Creation structure de base
Base = declarative_base()

#Client
class Age(Base):
    __tablename__ = "ages"
    age_id = Column(Integer, primary_key=True)
    age_plage = Column(String)
    # relationship
    clients = relationship("Client", back_populates="age")

class Region(Base):
    __tablename__ = "regions"
    region_id = Column(Integer, primary_key=True)
    region_nom = Column(String)
    # relationship
    clients = relationship("Client", back_populates="region")

class DonnePersonnel(Base):
    __tablename__ = "donnes_personnels"
    client_id = Column(Integer,ForeignKey("clients.client_id"), primary_key=True)
    login = Column(String)
    mot_de_passe = Column(String)
    # relationship
    client = relationship("Client", back_populates="donnes")

class Client(Base):
    __tablename__ = "clients"
    client_id = Column(Integer, primary_key=True)
    age_id = Column(Integer, ForeignKey("ages.age_id"))
    region_id = Column(Integer, ForeignKey("regions.region_id"))
    # relation
    age = relationship("Age", back_populates="clients")
    region = relationship("Region", back_populates="clients")
    donnes = relationship("DonnePersonnel", back_populates="client", uselist=False)
    commandes = relationship("Commande", back_populates="client")

In [107]:
#Produit

class Platform(Base):
    __tablename__ = "platforms"
    platform_cod = Column(Integer, primary_key=True)
    platform_nom = Column(String)
    # relationship
    produits = relationship("Produit", back_populates="platform")

class Genre(Base):
    __tablename__ = "genres"
    genre_cod = Column(Integer, primary_key=True)
    genre_nom = Column(String)
    # relationship
    produits = relationship("Produit", back_populates="genre")

class Publisher(Base):
    __tablename__ = "publishers"
    publisher_cod = Column(Integer, primary_key=True)
    publisher_nom = Column(String)
    # relationship
    produits = relationship("Produit", back_populates="publisher")

class Year(Base):
    __tablename__ = "years"
    year_cod = Column(Integer, primary_key=True)
    year_nom = Column(String)
    # relationship
    produits = relationship("Produit", back_populates="year")


class Produit(Base):
    __tablename__ = "produits"
    produit_id = Column(Integer, primary_key=True)
    name = Column(String)
    prix = Column(Integer)
    year_n = Column(Integer, ForeignKey("years.year_cod"))
    platform_cod = Column(Integer, ForeignKey("platforms.platform_cod"))
    genre_cod = Column(Integer, ForeignKey("genres.genre_cod"))
    publisher_cod = Column(Integer, ForeignKey("publishers.publisher_cod"))
    # relations
    year = relationship("Year", back_populates="produits")
    platform = relationship("Platform", back_populates="produits")
    genre = relationship("Genre", back_populates="produits")
    publisher = relationship("Publisher", back_populates="produits")
    commande = relationship("Commande", back_populates="produit")

In [108]:
# Commande

class Commande(Base):
    __tablename__ = "commandes"
    commande_id = Column(Integer, primary_key=True)
    nb_produit = Column(Integer)
    client_id = Column(Integer, ForeignKey("clients.client_id"))
    produit_id = Column(Integer, ForeignKey("produits.produit_id"))
    #relations
    client = relationship("Client", back_populates="commandes")
    produit = relationship("Produit", back_populates="commande")



In [109]:
# Initialisation
engine = create_engine("sqlite:///bd_joue_vent")
SesseonLocal = sessionmaker(bind=engine)
Base.metadata.create_all(engine)

### Extraction de données à partir d'un fichier CSV

In [110]:
# Extraction de données à partir d'un fichier CSV
path_csv = r"data\vgsales.csv"

df = pd.read_csv(path_csv)
df = df.fillna("unknown")

In [111]:
# produit, client atributes
list_platform = list(df["Platform"].dropna().unique().astype(str))
list_genre = list(df["Genre"].dropna().unique().astype(str))
list_publisher = list(df["Publisher"].dropna().unique().astype(str))
list_year = list(df["Year"].dropna().unique().astype(str))
list_region = ["NA", "EU", "JP", "Other"]
list_age = ["0 - 6 ans", "7 - 14 ans", "15 - 32 ans", "33 - 55 ans", "55 - 120 ans"]


In [112]:
# Utilisation
Session = sessionmaker(bind=engine)
session = Session()

In [113]:
# Ramplir Genre
for i, g in enumerate(list_genre):
    obj = Genre(genre_nom=g)
    session.add(obj)

session.commit()   

In [114]:
# Ramplir Publisher
for i, p in enumerate(list_publisher):
    obj = Publisher(publisher_nom=p)
    session.add(obj)

session.commit()

In [115]:
# Ramplir Year
for i, y in enumerate(list_year):
    obj = Year(year_nom=y)
    session.add(obj)

session.commit()

In [116]:
# Ramplir Platforme
for i, p in enumerate(list_platform):
    obj = Platform(platform_nom=p)
    session.add(obj)

session.commit()

In [117]:
# Ramplir Region
for i, g in enumerate(list_region):
    obj = Region(region_nom=g)
    session.add(obj)

session.commit()  

### Nettoyage des données

In [118]:
# Ramplir Produit

df_produit = df[["Name", "Platform",	"Year",	"Genre", "Publisher"]]
df_produit
for index, row in df_produit.iterrows():
    obj = Produit(
        name = row["Name"],
        prix = np.random.randint(20, 150),
        year_n = list_year.index(str(row["Year"])),
        platform_cod = list_platform.index(row["Platform"]),
        genre_cod = list_genre.index(row["Genre"]),
        publisher_cod = list_publisher.index(row["Publisher"])
        )
    session.add(obj)

session.commit()  

In [119]:
# Ramplir Age
for i, a in enumerate(list_age):
    obj = Age(age_plage=a)
    session.add(obj)

session.commit() 


### Génération des données client avec Facker

In [120]:
# Creation de faux donnes personneles
fake = Faker()

for _ in range(350):
    # creation de nouveau client
    client = Client(
        region_id=np.random.randint(0, 3),
        age_id=max(0, min(3, int(np.round(np.random.normal(loc=1.5, scale=1)))))
    )
    session.add(client)
    session.flush()  # pour avoir client_id

    # creation de donnes personnales
    donne = DonnePersonnel(
        login=fake.user_name(),
        mot_de_passe=fake.password(
            length=12,
            special_chars=True,
            digits=True,
            upper_case=True,
            lower_case=True
        ),
        client_id=client.client_id  # lien à client
    )
    session.add(donne)

session.commit()

### Génération du tableau des commandes

In [121]:
# Ramplir Commande

N_client = 350
N_produit = df_produit.shape[0]

for i in range(5000):
    obj = Commande(
        nb_produit=np.random.randint(1, 5),
        produit_id = np.random.randint(N_produit),
        client_id = np.random.randint(N_client)
        )
    session.add(obj)

session.commit()

In [122]:
# Fermer le session
session.close()
engine.dispose()
