# Generation of Fake data 

In [2]:
import mysql.connector
from faker import Faker
import random
from datetime import datetime, timedelta
import pandas as pd
print("All set up")

All set up


### connection 

In [5]:
conn = mysql.connector.connect(
    host="127.0.0.1",
    port=3307,
    user="sadop_user",
    password="1234",
    database="sadop_DB"
)
cursor = conn.cursor()
print("Connected to SADOP database")

Connected to SADOP database


### Inintialization 

In [6]:
fake = Faker()

In [8]:
NUM_USERS = 10_000            # users
ACCOUNTS_PER_USER = (1, 3)   # min, max
TRANSACTIONS_PER_ACCOUNT = (5, 20)

BATCH_SIZE = 1000            # SAFE for MySQL + laptop


### 1.1 insert 10000 row fake data for users 

In [9]:

def insert_users(num_users, batch_size):
    users_batch = []
    
    for i in range(1, num_users + 1):
        users_batch.append((
            fake.name(),
            fake.unique.email(),
            fake.phone_number()[:20],
            fake.date_time_between(start_date='-3y', end_date='now')
        ))
        
        if i % batch_size == 0:
            cursor.executemany("""
                INSERT INTO Users (name, email, phone, created_at)
                VALUES (%s, %s, %s, %s)
            """, users_batch)
            conn.commit()
            users_batch.clear()
            print(f"Inserted {i} users")

    if users_batch:
        cursor.executemany("""
            INSERT INTO Users (name, email, phone, created_at)
            VALUES (%s, %s, %s, %s)
        """, users_batch)
        conn.commit()

insert_users(NUM_USERS, BATCH_SIZE)


Inserted 1000 users
Inserted 2000 users
Inserted 3000 users
Inserted 4000 users
Inserted 5000 users
Inserted 6000 users
Inserted 7000 users
Inserted 8000 users
Inserted 9000 users
Inserted 10000 users


### insert 20000 row of  fake data to the Accounts 

In [10]:
cursor.execute("SELECT user_id FROM Users")
user_ids = [u[0] for u in cursor.fetchall()]

accounts_batch = []

for i, user_id in enumerate(user_ids, start=1):
    for _ in range(random.randint(*ACCOUNTS_PER_USER)):
        accounts_batch.append((
            user_id,
            random.choice(['checking', 'savings']),
            round(random.uniform(100, 10000), 2),
            fake.date_time_between(start_date='-3y', end_date='now')
        ))

    if i % BATCH_SIZE == 0:
        cursor.executemany("""
            INSERT INTO Accounts (user_id, type, balance, created_at)
            VALUES (%s, %s, %s, %s)
        """, accounts_batch)
        conn.commit()
        accounts_batch.clear()
        print(f"Processed accounts for {i} users")

if accounts_batch:
    cursor.executemany("""
        INSERT INTO Accounts (user_id, type, balance, created_at)
        VALUES (%s, %s, %s, %s)
    """, accounts_batch)
    conn.commit()


Processed accounts for 1000 users
Processed accounts for 2000 users
Processed accounts for 3000 users
Processed accounts for 4000 users
Processed accounts for 5000 users
Processed accounts for 6000 users
Processed accounts for 7000 users
Processed accounts for 8000 users
Processed accounts for 9000 users
Processed accounts for 10000 users


### insert a 250000 fake data row 

In [11]:
cursor.execute("SELECT account_id FROM Accounts")
account_ids = [a[0] for a in cursor.fetchall()]

transactions_batch = []
count = 0

for account_id in account_ids:
    for _ in range(random.randint(*TRANSACTIONS_PER_ACCOUNT)):
        transactions_batch.append((
            account_id,
            round(random.uniform(5, 2000), 2),
            random.choice(['debit', 'credit']),
            fake.date_time_between(start_date='-2y', end_date='now')
        ))
        count += 1

        if count % BATCH_SIZE == 0:
            cursor.executemany("""
                INSERT INTO Transactions (account_id, amount, type, timestamp)
                VALUES (%s, %s, %s, %s)
            """, transactions_batch)
            conn.commit()
            transactions_batch.clear()
            print(f"Inserted {count} transactions")

if transactions_batch:
    cursor.executemany("""
        INSERT INTO Transactions (account_id, amount, type, timestamp)
        VALUES (%s, %s, %s, %s)
    """, transactions_batch)
    conn.commit()


Inserted 1000 transactions
Inserted 2000 transactions
Inserted 3000 transactions
Inserted 4000 transactions
Inserted 5000 transactions
Inserted 6000 transactions
Inserted 7000 transactions
Inserted 8000 transactions
Inserted 9000 transactions
Inserted 10000 transactions
Inserted 11000 transactions
Inserted 12000 transactions
Inserted 13000 transactions
Inserted 14000 transactions
Inserted 15000 transactions
Inserted 16000 transactions
Inserted 17000 transactions
Inserted 18000 transactions
Inserted 19000 transactions
Inserted 20000 transactions
Inserted 21000 transactions
Inserted 22000 transactions
Inserted 23000 transactions
Inserted 24000 transactions
Inserted 25000 transactions
Inserted 26000 transactions
Inserted 27000 transactions
Inserted 28000 transactions
Inserted 29000 transactions
Inserted 30000 transactions
Inserted 31000 transactions
Inserted 32000 transactions
Inserted 33000 transactions
Inserted 34000 transactions
Inserted 35000 transactions
Inserted 36000 transactions
I

### insert fake data row for the Logs 8000

In [13]:

#GET ACCOUNT IDS
cursor.execute("SELECT account_id FROM Accounts")
account_ids = [a[0] for a in cursor.fetchall()]

print("Accounts loaded:", len(account_ids))

## DEFINE LOG ACTIONS (REALISTIC)
LOG_ACTIONS = [
    "balance_check",
    "deposit",
    "withdrawal",
    "transfer",
    "login",
    "logout",
    "failed_login",
    "slow_query",
    "suspicious_activity"
]

##INSERT LOGS (BATCHED)
LOGS_PER_ACCOUNT = (3, 5)
BATCH_SIZE = 1000

logs_batch = []
count = 0

for account_id in account_ids:
    for _ in range(random.randint(*LOGS_PER_ACCOUNT)):
        logs_batch.append((
            account_id,
            random.choice(LOG_ACTIONS),
            round(random.uniform(0.01, 5.0), 4),  # query_time in seconds
            fake.date_time_between(start_date='-1y', end_date='now')
        ))
        count += 1

        if count % BATCH_SIZE == 0:
            cursor.executemany("""
                INSERT INTO Logs (account_id, action, query_time, timestamp)
                VALUES (%s, %s, %s, %s)
            """, logs_batch)
            conn.commit()
            logs_batch.clear()
            print(f"Inserted {count} logs")

if logs_batch:
    cursor.executemany("""
        INSERT INTO Logs (account_id, action, query_time, timestamp)
        VALUES (%s, %s, %s, %s)
    """, logs_batch)
    conn.commit()


Accounts loaded: 19968
Inserted 1000 logs
Inserted 2000 logs
Inserted 3000 logs
Inserted 4000 logs
Inserted 5000 logs
Inserted 6000 logs
Inserted 7000 logs
Inserted 8000 logs
Inserted 9000 logs
Inserted 10000 logs
Inserted 11000 logs
Inserted 12000 logs
Inserted 13000 logs
Inserted 14000 logs
Inserted 15000 logs
Inserted 16000 logs
Inserted 17000 logs
Inserted 18000 logs
Inserted 19000 logs
Inserted 20000 logs
Inserted 21000 logs
Inserted 22000 logs
Inserted 23000 logs
Inserted 24000 logs
Inserted 25000 logs
Inserted 26000 logs
Inserted 27000 logs
Inserted 28000 logs
Inserted 29000 logs
Inserted 30000 logs
Inserted 31000 logs
Inserted 32000 logs
Inserted 33000 logs
Inserted 34000 logs
Inserted 35000 logs
Inserted 36000 logs
Inserted 37000 logs
Inserted 38000 logs
Inserted 39000 logs
Inserted 40000 logs
Inserted 41000 logs
Inserted 42000 logs
Inserted 43000 logs
Inserted 44000 logs
Inserted 45000 logs
Inserted 46000 logs
Inserted 47000 logs
Inserted 48000 logs
Inserted 49000 logs
Insert

### display data and tabels 

In [14]:
tables = ["Users", "Accounts", "Transactions","Logs"]

for t in tables:
    cursor.execute(f"SELECT COUNT(*) FROM {t}")
    print(t, "→", cursor.fetchone()[0])


Users → 10000
Accounts → 19968
Transactions → 250188
Logs → 79849


In [15]:
cursor.execute("SELECT COUNT(*) FROM Users")
print("Users:", cursor.fetchone()[0])

cursor.execute("SELECT COUNT(*) FROM Accounts")
print("Accounts:", cursor.fetchone()[0])

cursor.execute("SELECT COUNT(*) FROM Transactions")
print("Transactions:", cursor.fetchone()[0])

cursor.execute("SELECT COUNT(*) FROM Logs")
print("Logs:", cursor.fetchone()[0])

Users: 10000
Accounts: 19968
Transactions: 250188
Logs: 79849
