In [1]:
from sqlalchemy.sql.expression import insert, delete, select, func
from sqlalchemy.orm import sessionmaker
from sqlalchemy import create_engine
from model.database.base import Base
from model.database.models import *
from model._check_insert import *
from model._dates import *
from model._cryption import *
from model._helper import *
from model._crud import *

import pandas as pd
from IPython.display import clear_output
from faker import Faker
import hashlib
import random

In [2]:
vg_sales = pd.read_csv('vgsales.csv')

In [3]:
engine = create_engine("sqlite:///video_games_sales.db", echo=True)

Base.metadata.create_all(engine)

Session = sessionmaker(bind=engine)
session = Session()
clear_output()

In [4]:
session.rollback()
users_list = []
fake = Faker('fr_FR')
for _ in range(2**6):
    name = encrypt(fake.name().lower().lower())
    email = hashlib.sha256(fake.email().lower().encode()).hexdigest()
    phone_number = encrypt(fake.phone_number())
    try:
        address, city_code = fake.address().lower().split('\n')
        address = encrypt(address)
        post_code, city_name = city_code.lower().split(' ', 1)
        country_name = fake.country().lower()
    except: 
        continue
    region_name = random.choice(['EU', 'US', 'JP', 'Other'])
    keep_info = random.choice([True, True, True, True, False])
    deleted_in = random_date(d, d_plus_five_y) if keep_info == True else random_date(d, d_plus_two_w)

    db = check_insert_data(session=session)
    post_code_db = db.post_code(region_name=region_name, country_name=country_name, city_name=city_name, post_code_value=post_code)
    
    users_list.append(User(name= name, email = email, phone_number = phone_number, address = address, post_code = post_code_db, keep_info=keep_info, deleted_in=deleted_in))
clear_output()
add_commit_close_clear(session, users_list)

In [5]:
year_list = [{'year_date':int(i)} for i in  sorted(vg_sales.Year[vg_sales.Year.notna()].unique().tolist())]
rank_list = [{'ranking':i} for i in sorted(vg_sales.Rank.unique().tolist())]
publisher_list = [{'name':i} for i in sorted(vg_sales.Publisher[vg_sales.Publisher.notna()].unique().tolist())]
platform_list = [{'name':i} for i in sorted(vg_sales.Platform.unique().tolist())]
genre_list = [{'name':i} for i in sorted(vg_sales.Genre.unique().tolist())]
game_list = [{'name':i} for i in sorted(vg_sales.Name.unique().tolist())]

classes = [Year, Rank, Publisher, Platform, Genre, Game]
lists = [year_list, rank_list, publisher_list, platform_list, genre_list, game_list]
output =[]
for i in range(6):
    try:
        session.rollback()
        session.bulk_insert_mappings(classes[i], lists[i])
        session.commit()
        output.append('Inserted')
    except Exception as e:
        output.append(f'Insertion failed due to {e}')
clear_output()
output

['Inserted', 'Inserted', 'Inserted', 'Inserted', 'Inserted', 'Inserted']

In [6]:
year_records = session.query(Year.id, Year.year_date).all()
year_to_id = {year_date:id for id, year_date in year_records}

rank_records = session.query(Rank.id, Rank.ranking).all()
rank_to_id = {ranking:id for id, ranking in rank_records}

publisher_records = session.query(Publisher.id, Publisher.name).all()
publisher_to_id = {name:id for id, name in publisher_records}

platform_records = session.query(Platform.id, Platform.name).all()
platform_to_id = {name:id for id, name in platform_records}

genre_records = session.query(Genre.id, Genre.name).all()
genre_to_id = {name:id for id, name in genre_records}

game_records = session.query(Game.id, Game.name).all()
game_to_id = {name:id for id, name in game_records}

game_version_list = [{'platform_id': platform_to_id[value.Platform], 
                      'rank_id':rank_to_id[value.Rank], 
                      'year_id':year_to_id[value.Year] if not pd.isna(value.Year) else None, 
                      'publisher_id':publisher_to_id[value.Publisher] if not pd.isna(value.Publisher) else None,
                      'game_id':game_to_id[value.Name], 
                      'genre_id':genre_to_id[value.Genre]} 
                      for _, value in vg_sales[['Platform', 'Rank', 'Year', 'Publisher', 'Name', 'Genre']].iterrows()]
game_version_list
game_version_df = pd.DataFrame(game_version_list)
game_version_df[game_version_df.publisher_id.isna()]

2025-12-04 09:59:18,219 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-12-04 09:59:18,220 INFO sqlalchemy.engine.Engine SELECT years.id AS years_id, years.year_date AS years_year_date 
FROM years
2025-12-04 09:59:18,221 INFO sqlalchemy.engine.Engine [generated in 0.00057s] ()
2025-12-04 09:59:18,223 INFO sqlalchemy.engine.Engine SELECT ranks.id AS ranks_id, ranks.ranking AS ranks_ranking 
FROM ranks
2025-12-04 09:59:18,223 INFO sqlalchemy.engine.Engine [generated in 0.00048s] ()
2025-12-04 09:59:18,289 INFO sqlalchemy.engine.Engine SELECT publishers.id AS publishers_id, publishers.name AS publishers_name 
FROM publishers
2025-12-04 09:59:18,290 INFO sqlalchemy.engine.Engine [generated in 0.00063s] ()
2025-12-04 09:59:18,291 INFO sqlalchemy.engine.Engine SELECT platforms.id AS platforms_id, platforms.name AS platforms_name 
FROM platforms
2025-12-04 09:59:18,292 INFO sqlalchemy.engine.Engine [generated in 0.00040s] ()
2025-12-04 09:59:18,293 INFO sqlalchemy.engine.Engine SELECT gen

Unnamed: 0,platform_id,rank_id,year_id,publisher_id,game_id,genre_id
470,17,471,,,11492,3
1303,16,1304,,,10509,11
1662,7,1663,28.0,,8610,4
2222,7,2223,26.0,,773,4
3159,7,3160,25.0,,6743,4
3166,7,3167,25.0,,8981,4
3766,7,3767,25.0,,8982,4
4145,18,4146,,,8866,5
4526,7,4527,25.0,,9819,4
4635,7,4636,25.0,,9820,4


In [7]:
year_records = session.query(Year.id, Year.year_date).all()
year_to_id = {year_date:id for id, year_date in year_records}

rank_records = session.query(Rank.id, Rank.ranking).all()
rank_to_id = {ranking:id for id, ranking in rank_records}

publisher_records = session.query(Publisher.id, Publisher.name).all()
publisher_to_id = {name:id for id, name in publisher_records}

platform_records = session.query(Platform.id, Platform.name).all()
platform_to_id = {name:id for id, name in platform_records}

genre_records = session.query(Genre.id, Genre.name).all()
genre_to_id = {name:id for id, name in genre_records}

game_records = session.query(Game.id, Game.name).all()
game_to_id = {name:id for id, name in game_records}

game_version_list = [{'platform_id': platform_to_id[value.Platform], 
                      'rank_id':rank_to_id[value.Rank], 
                      'year_id':year_to_id[value.Year] if not pd.isna(value.Year) else None, 
                      'publisher_id':publisher_to_id[value.Publisher] if not pd.isna(value.Publisher) else None,
                      'game_id':game_to_id[value.Name], 
                      'genre_id':genre_to_id[value.Genre]} 
                      for _, value in vg_sales[['Platform', 'Rank', 'Year', 'Publisher', 'Name', 'Genre']].iterrows()]
game_version_list

try:
    session.rollback()
    bulk_commit(session, GameVersion, game_version_list)
    session.commit()
    output.append('Inserted')
except Exception as e:
    output.append(f'Insertion failed due to {e}')

In [8]:
session.rollback()
game_version_records = session.query(GameVersion.id.label('id'),
                                     Platform.name.label('platform'), 
                                     Rank.ranking.label('rank'), 
                                     Year.year_date.label('year_date'), 
                                     Publisher.name.label('publisher'), 
                                     Game.name.label('game'), 
                                     GameVersion.id.label('game_version_id'), 
                                     Genre.name.label('genre')
                                     ).join(GameVersion.platform).join(GameVersion.rank).join(GameVersion.year, isouter=True).join(GameVersion.publisher, isouter=True).join(GameVersion.game).join(GameVersion.genre).all()
records_data = [{'game_version_id': value.game_version_id,'Rank': value.rank, 'Platform': value.platform, 'Year_Date': value.year_date, 'Publisher': value.publisher, 'Game': value.game, 'Genre': value.genre} for value in game_version_records]
records_df = pd.DataFrame(records_data)
vg_sales2 = vg_sales.rename(columns={'Name':'Game', 'Year': 'Year_Date'}).copy()
match_columns = ['Rank', 'Platform', 'Year_Date', 'Publisher', 'Game', 'Genre']
merged_df = pd.merge(
    left=records_df,
    right=vg_sales2[['Rank', 'Platform', 'Year_Date', 'Publisher', 'Game', 'Genre', 'NA_Sales', 'EU_Sales', 'JP_Sales', 'Other_Sales']],
    left_on=match_columns,
    right_on=match_columns,
    how='right'
)
sales_list = [{'game_version_id': value.game_version_id, 'na_sales':value.NA_Sales, 'eu_sales':value.EU_Sales, 'jp_sales':value.JP_Sales, 'other_sales':value.Other_Sales} for _, value in merged_df.iterrows()]
sales_df = pd.DataFrame(sales_list)
print(sales_df.game_version_id.isna().sum())
sales_df[sales_df.game_version_id.isna()]
bulk_commit(session, Sale, sales_list)

'Inserted'

In [9]:
session.rollback()
transaction_list = []
for i in range(256):
    quantity = int(random.normalvariate(1.5, 1)) + 1
    random_game_version = session.query(GameVersion.id).order_by(func.random()).first()
    random_user = session.query(User.id).order_by(func.random()).first()
    transaction_list.append({'quantity':quantity, 'game_version_id':random_game_version.id, 'user_id':random_user.id})
bulk_commit(session, Transaction, transaction_list)

'Inserted'

In [10]:
session.query(Platform).filter_by(name='PC').first().id

2025-12-04 09:59:22,585 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2025-12-04 09:59:22,586 INFO sqlalchemy.engine.Engine SELECT platforms.id AS platforms_id, platforms.name AS platforms_name 
FROM platforms 
WHERE platforms.name = ?
 LIMIT ? OFFSET ?
2025-12-04 09:59:22,586 INFO sqlalchemy.engine.Engine [generated in 0.00031s] ('PC', 1, 0)


14

In [11]:
session.execute(delete(User).where(User.deleted_in <= datetime.datetime.now().date()))
session.commit()
clear_output()

# BREAK

In [12]:
session.rollback()
new_game = add_game_version(session, 'PC', '2025', 'CD Project', 'Cyberpunk 2077', 'Action')
new_game

In [13]:
session.rollback()
new_user = add_user(session, 'Cyril Leconte', 'cyril.leconte@email.com', '+33 6 06 06 06 06', '1 champ elysee', '75000', 'Paris', 'France', 'EU')
new_user

In [14]:
user = retrive_user_with_mail(session, 'cyril.leconte@email.com')
user

{'Name': 'Cyril Leconte',
 'Email': 'cyril.leconte@email.com',
 'Phone number': '+33 6 06 06 06 06',
 'Address': '1 champ elysee',
 'Post Code': '75000',
 'City': 'Paris',
 'Country': 'France',
 'Region': 'EU'}

In [15]:
game = retrive_game(session, 'Cyberpunk 2077')
game

{'Name': 'Cyberpunk 2077',
 'Platform': 'PC',
 'Year': 2025,
 'Publisher': 'CD Project',
 'Genre': 'Action'}