In [None]:
%pip install ipython-sql


In [2]:
%load_ext sql


In [3]:
%sql sqlite:///badge_attendance.db

In [None]:
%%sql
CREATE TABLE IF NOT EXISTS students (
    student_id INTEGER PRIMARY KEY AUTOINCREMENT,
    badge_id TEXT UNIQUE,
    name TEXT,
    department TEXT,
    year_or_sem TEXT
);


In [None]:
%%sql
CREATE TABLE IF NOT EXISTS badge_events (
    event_id INTEGER PRIMARY KEY AUTOINCREMENT,
    badge_id TEXT,
    event_time TEXT,
    event_type TEXT,
    FOREIGN KEY (badge_id) REFERENCES students(badge_id)
);


In [None]:
import sqlite3
import random
from datetime import datetime, timedelta
from faker import Faker

fake = Faker()


In [None]:
%pip install faker

In [None]:
import sqlite3

conn = sqlite3.connect("badge_attendance.db")
cursor = conn.cursor()


In [None]:
NUM_STUDENTS = 2000

departments = ["Engineering", "Science", "Business", "Arts", "Medicine", "Law"]
years = ["1st Year", "2nd Year", "3rd Year", "4th Year"]

students_data = []

for i in range(NUM_STUDENTS):
    badge_id = f"BADGE{i:06d}"
    name = fake.name()
    department = random.choice(departments)
    year = random.choice(years)

    students_data.append((badge_id, name, department, year))

cursor.executemany(
    "INSERT INTO students (badge_id, name, department, year_or_sem) VALUES (?, ?, ?, ?)",
    students_data
)

conn.commit()

print("Students inserted:", len(students_data))


In [None]:
%config SqlMagic.style = 'plain'


In [4]:
%%sql
SELECT COUNT(*) FROM students;


 * sqlite:///badge_attendance.db
Done.


COUNT(*)
2000


In [None]:
import prettytable
[x for x in prettytable.__dict__.keys() if x.isupper()]


In [None]:
%pip install prettytable==3.9.0


In [None]:
NUM_DAYS = 220
SESSIONS_PER_DAY_MIN = 2
SESSIONS_PER_DAY_MAX = 5
WORK_START_HOUR = 8
WORK_END_HOUR = 17


In [None]:
cursor.execute("SELECT badge_id FROM students")
badge_ids = [row[0] for row in cursor.fetchall()]

print("Students available:", len(badge_ids))


In [None]:
from datetime import datetime, timedelta
import random

TARGET_EVENTS = 1_000_000
BATCH_SIZE = 50_000

events_data = []
total_events = 0
batch_no = 0

start_date = datetime(2024, 9, 1)

NUM_DAYS = 220
SESSIONS_PER_DAY_MIN = 2
SESSIONS_PER_DAY_MAX = 4
WORK_START_HOUR = 8
WORK_END_HOUR = 17

for day in range(NUM_DAYS):

    if total_events >= TARGET_EVENTS:
        break

    date = start_date + timedelta(days=day)

    students_today = random.sample(
        badge_ids,
        random.randint(800, 1600)
    )

    for badge in students_today:

        if total_events >= TARGET_EVENTS:
            break

        num_sessions = random.randint(
            SESSIONS_PER_DAY_MIN,
            SESSIONS_PER_DAY_MAX
        )

        for _ in range(num_sessions):

            if total_events >= TARGET_EVENTS:
                break

            # IN time
            in_hour = random.randint(WORK_START_HOUR, WORK_END_HOUR - 1)
            in_min = random.randint(0, 59)

            in_time = date.replace(
                hour=in_hour, minute=in_min,
                second=0, microsecond=0
            )

            events_data.append((badge, in_time.isoformat(), "IN"))
            total_events += 1

            # OUT time
            out_hour = min(in_hour + random.randint(1, 6), WORK_END_HOUR - 1)
            out_min = random.randint(0, 59)

            out_time = date.replace(
                hour=out_hour, minute=out_min,
                second=0, microsecond=0
            )

            if out_time <= in_time:
                out_time = in_time + timedelta(hours=1)

            events_data.append((badge, out_time.isoformat(), "OUT"))
            total_events += 1

            # INSERT BATCH
            if len(events_data) >= BATCH_SIZE:
                cursor.executemany(
                    "INSERT INTO badge_events (badge_id, event_time, event_type) VALUES (?, ?, ?)",
                    events_data
                )
                conn.commit()

                batch_no += 1
                print(f"Batch {batch_no} inserted ({len(events_data)}) — Total {total_events}")

                events_data = []

# insert remaining rows
if events_data:
    cursor.executemany(
        "INSERT INTO badge_events (badge_id, event_time, event_type) VALUES (?, ?, ?)",
        events_data
    )
    conn.commit()

print("\nDONE — Final total:", total_events)


In [5]:
%%sql
SELECT COUNT(*) FROM badge_events;


 * sqlite:///badge_attendance.db
Done.


COUNT(*)
1000000


In [6]:
%%sql
SELECT event_type, COUNT(*)
FROM badge_events
GROUP BY event_type;


 * sqlite:///badge_attendance.db
Done.


event_type,COUNT(*)
IN,500000
OUT,500000


In [7]:
%%sql
SELECT *
FROM badge_events
LIMIT 5;


 * sqlite:///badge_attendance.db
Done.


event_id,badge_id,event_time,event_type
2000001,BADGE000078,2024-09-01T09:13:00,IN
2000002,BADGE000078,2024-09-01T12:01:00,OUT
2000003,BADGE000078,2024-09-01T14:26:00,IN
2000004,BADGE000078,2024-09-01T16:28:00,OUT
2000005,BADGE000078,2024-09-01T16:58:00,IN


In [8]:
%%sql
SELECT event_type, COUNT(*)
FROM badge_events
GROUP BY event_type;


 * sqlite:///badge_attendance.db
Done.


event_type,COUNT(*)
IN,500000
OUT,500000


In [9]:
%%sql
SELECT *
FROM badge_events
WHERE badge_id = 'BADGE000123'
LIMIT 10;


 * sqlite:///badge_attendance.db
Done.


event_id,badge_id,event_time,event_type
2012981,BADGE000123,2024-09-02T09:49:00,IN
2012982,BADGE000123,2024-09-02T12:57:00,OUT
2012983,BADGE000123,2024-09-02T15:51:00,IN
2012984,BADGE000123,2024-09-02T16:55:00,OUT
2012985,BADGE000123,2024-09-02T14:55:00,IN
2012986,BADGE000123,2024-09-02T16:13:00,OUT
2012987,BADGE000123,2024-09-02T13:59:00,IN
2012988,BADGE000123,2024-09-02T14:45:00,OUT
2024495,BADGE000123,2024-09-03T13:40:00,IN
2024496,BADGE000123,2024-09-03T16:37:00,OUT


In [10]:
%%sql
SELECT DATE(event_time) AS day,
       COUNT(*) AS total_events
FROM badge_events
GROUP BY day
ORDER BY day;


 * sqlite:///badge_attendance.db
Done.


day,total_events
2024-09-01,8850
2024-09-02,8826
2024-09-03,9608
2024-09-04,8130
2024-09-05,7320
2024-09-06,7790
2024-09-07,7736
2024-09-08,6686
2024-09-09,7990
2024-09-10,7574
