In [2]:
import numpy as np
import pandas as pd
from faker import Faker
import random
import matplotlib.pyplot as plt
import secrets
import string
import psycopg2
from datetime import timedelta, datetime

# Fake client data

In [3]:
def getRandomPasswordString():
    alphabet = string.ascii_letters + string.digits + string.punctuation.replace(',', '')  # Exclure la virgule

    password = secrets.choice(string.ascii_uppercase) + \
               secrets.choice(string.ascii_lowercase) + \
               secrets.choice(string.digits) + \
               secrets.choice(string.punctuation.replace(',', '')) + \
               ''.join(secrets.choice(alphabet) for i in range(12))

    return password


In [4]:
getRandomPasswordString()

'Jk1~tKmQL!1P4e;2'

In [5]:
def generate_data_1():
    fake = Faker()
    liste_des_domaines = ['com','net','org','gov','fr']

    list_rows = []
    nb_row = random.randint(10000, 12000)

    generated_ids = set()   # Utilisation d'un ensemble pour stocker les IDs générés

    for i in range(nb_row):
        first_name = fake.first_name()
        last_name = fake.last_name()
        company = fake.company().split()[0].strip(',')
        dns_org = fake.random_choices(elements=liste_des_domaines, length=1)[0]
        email = f"{first_name}.{last_name}@{company}.{dns_org}".lower()
        unique_id = random.randint(100000, 999999)
        age = random.randint(18, 80)
        pwd = getRandomPasswordString()

        if unique_id not in generated_ids:
            generated_ids.add(unique_id)  # Ajoute l'ID à l'ensemble des IDs générés
            list_rows.append(
                (unique_id, first_name, last_name, pwd, age, fake.job(), fake.address(), fake.phone_number(), email)
            )
    return list_rows

In [6]:
data = generate_data_1()

In [7]:
client = pd.DataFrame(data, columns=['id', 'prenom', 'nom', 'pwd', 'age', 'job', 'adress', 'phoneNumber', 'email'])
client.head()

Unnamed: 0,id,prenom,nom,pwd,age,job,adress,phoneNumber,email
0,771045,Jared,Trevino,"Hu1)YYR~k3""d~[[{",55,Dealer,"46600 Yoder Vista\nPierceborough, WI 00681",653-473-8872,jared.trevino@young.net
1,213213,Sydney,Johnson,Iq3`P}8/Sw/ZS=ua,23,"Editor, commissioning","485 Lynch Lake Apt. 791\nNorth Sandra, CO 85333",939-616-4491x73622,sydney.johnson@burns-stanley.com
2,487388,Linda,Fuller,Qb0=Ga!Qn>]sVH!c,28,Merchant navy officer,"405 Marc Mountains Suite 190\nPort Cherylfurt,...",001-807-622-0270,linda.fuller@mitchell.net
3,872983,Charles,Stanley,Uh8=59VxR77RRe*B,26,Lobbyist,"082 Perez Locks Apt. 478\nNorth Davidchester, ...",956-998-2873,charles.stanley@lee.org
4,664335,Craig,Rogers,Td8:l11P-U'!^XNf,78,"Journalist, broadcasting","0072 Mitchell Valleys Suite 978\nRobertmouth, ...",4332729753,craig.rogers@gilmore.org


In [8]:
client.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 9976 entries, 0 to 9975
Data columns (total 9 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id           9976 non-null   int64 
 1   prenom       9976 non-null   object
 2   nom          9976 non-null   object
 3   pwd          9976 non-null   object
 4   age          9976 non-null   int64 
 5   job          9976 non-null   object
 6   adress       9976 non-null   object
 7   phoneNumber  9976 non-null   object
 8   email        9976 non-null   object
dtypes: int64(2), object(7)
memory usage: 701.6+ KB


In [9]:
client.to_csv('client.csv', index=False)

# Fake game stats data

In [83]:
# Générer les IDs de 2 à 50
all_ids = np.arange(2, 52)
excluded_ids = [8, 11, 12, 13, 14, 15, 16, 27, 28, 29, 34, 45]
id_values = [e for e in all_ids if e not in excluded_ids]

# Générer toutes les dates de 2023
start_date = pd.to_datetime('2022-01-01')
end_date = pd.to_datetime('2023-12-31')
dates = pd.date_range(start_date, end_date, freq='D')

np.random.seed(10)

# Créer une DataFrame vide pour stocker les données
jeux_stats = pd.DataFrame(columns=['id_article', 'date', 'qte_ajoutee', 'qte_achetee'])

# Générer des quantités aléatoires en fonction de chaque article et date
for article_id in id_values:
    # Générer des paramètres spécifiques pour chaque article (moyenne et écart-type)
    moyenne_ajoutee = np.random.uniform(50, 200)  # Moyenne pour qte_ajoutee
    ecart_type_ajoutee = np.random.uniform(20, 100)  # Écart-type pour qte_ajoutee
    
    moyenne_achetee = moyenne_ajoutee * np.random.uniform(0.5, 0.8)  # Moyenne pour qte_achetee
    ecart_type_achetee = ecart_type_ajoutee * np.random.uniform(0.5, 0.8)  # Écart-type pour qte_achetee
    
    # Générer des quantités aléatoires pour chaque date en utilisant une distribution normale
    qte_ajoutee = np.random.normal(moyenne_ajoutee, ecart_type_ajoutee, len(dates)).astype(int)
    qte_ajoutee = np.abs(qte_ajoutee)  # Rendre les valeurs négatives positives
    qte_achetee = np.random.normal(moyenne_achetee, ecart_type_achetee, len(dates)).astype(int)
    qte_achetee = np.abs(qte_achetee)  # Rendre les valeurs négatives positives
    qte_achetee = np.where(qte_achetee == 0, 1, qte_achetee) # Assurer que qte_achetee n'est pas nulle
    
    # Créer une DataFrame pour cet article et concaténer avec la DataFrame principale
    article_data = pd.DataFrame({
        'id_article': article_id,
        'date': dates,
        'qte_ajoutee': qte_ajoutee,
        'qte_achetee': qte_achetee
    })
    jeux_stats = pd.concat([jeux_stats, article_data])

# Réinitialiser l'index
jeux_stats.reset_index(drop=True, inplace=True)

# Convertir la colonne 'date' en format date YYYY-MM-DD
jeux_stats['date'] = jeux_stats['date'].dt.strftime('%Y-%m-%d')

  jeux_stats = pd.concat([jeux_stats, article_data])


In [86]:
jeux_stats.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 27740 entries, 0 to 27739
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   id_article   27740 non-null  object
 1   date         27740 non-null  object
 2   qte_ajoutee  27740 non-null  object
 3   qte_achetee  27740 non-null  object
dtypes: object(4)
memory usage: 867.0+ KB


In [85]:
jeux_stats.to_csv('jeux_stats.csv', index=False)