In [None]:
%pip install ipython-sql


In [1]:
%load_ext sql


In [2]:
%sql sqlite:///badge_attendance.db

In [4]:
import sqlite3
import random
from datetime import datetime, timedelta
from faker import Faker

fake = Faker()


In [5]:
conn = sqlite3.connect("badge_attendance.db")
cursor = conn.cursor()


In [26]:

%%sql
CREATE TABLE IF NOT EXISTS dim_students (
    student_id INTEGER PRIMARY KEY AUTOINCREMENT,
    badge_id TEXT UNIQUE,
    name TEXT,
    department TEXT,
    year_or_sem TEXT
);



 * sqlite:///badge_attendance.db
Done.


[]

In [27]:
%%sql
CREATE TABLE IF NOT EXISTS dim_teachers (
    teacher_id INTEGER PRIMARY KEY AUTOINCREMENT,
    badge_id TEXT UNIQUE,
    name TEXT,
    department TEXT
);


 * sqlite:///badge_attendance.db
Done.


[]

In [28]:
%%sql
CREATE TABLE src_badge_events (
    event_id INTEGER PRIMARY KEY AUTOINCREMENT,
    badge_id TEXT,
    person_type TEXT,
    event_time TEXT,
    event_type TEXT
);



 * sqlite:///badge_attendance.db
Done.


[]

In [29]:
NUM_STUDENTS = 2000

departments = ["Engineering", "Science", "Business", "Arts", "Medicine", "Law"]
years = ["1st Year", "2nd Year", "3rd Year", "4th Year"]

students_data = []

for i in range(NUM_STUDENTS):
    badge_id = f"BADGE{i:06d}"
    name = fake.name()
    department = random.choice(departments)
    year = random.choice(years)

    students_data.append((badge_id, name, department, year))

cursor.executemany(
    "INSERT INTO dim_students (badge_id, name, department, year_or_sem) VALUES (?, ?, ?, ?)",
    students_data
)
conn.commit()

print("Students inserted:", len(students_data))



Students inserted: 2000


In [30]:
NUM_TEACHERS = 150

teachers_data = []

for i in range(NUM_TEACHERS):
    badge_id = f"TEACHER{i:05d}"
    name = fake.name()
    department = random.choice(departments)

    teachers_data.append((badge_id, name, department))

cursor.executemany(
    "INSERT INTO dim_teachers (badge_id, name, department) VALUES (?, ?, ?)",
    teachers_data
)
conn.commit()

print("Teachers inserted:", len(teachers_data))



Teachers inserted: 150


In [31]:
cursor.execute("SELECT badge_id FROM dim_students;")
student_badges = [r[0] for r in cursor.fetchall()]

cursor.execute("SELECT badge_id FROM dim_teachers;")
teacher_badges = [r[0] for r in cursor.fetchall()]



In [32]:
TARGET_EVENTS = 1_000_000
BATCH_SIZE = 50_000

events_data = []
total_events = 0
batch_no = 0

start_date = datetime(2024, 9, 1)

NUM_DAYS = 220
WORK_START_HOUR = 8
WORK_END_HOUR = 17

while total_events + 2 <= TARGET_EVENTS:

    date = start_date + timedelta(days=random.randint(0, NUM_DAYS - 1))

    # 85% students, 15% teachers
    if random.random() < 0.85:
        badge = random.choice(student_badges)
        person_type = "student"
    else:
        badge = random.choice(teacher_badges)
        person_type = "teacher"

    in_hour = random.randint(WORK_START_HOUR, WORK_END_HOUR - 1)
    in_min = random.randint(0, 59)

    in_time = date.replace(
        hour=in_hour, minute=in_min,
        second=0, microsecond=0
    )

    out_hour = min(in_hour + random.randint(1, 6), WORK_END_HOUR - 1)
    out_min = random.randint(0, 59)

    out_time = date.replace(
        hour=out_hour, minute=out_min,
        second=0, microsecond=0
    )

    if out_time <= in_time:
        out_time = in_time + timedelta(hours=1)

    events_data.append((badge, person_type, in_time.isoformat(), "IN"))
    events_data.append((badge, person_type, out_time.isoformat(), "OUT"))
    total_events += 2

    if len(events_data) >= BATCH_SIZE:
        cursor.executemany(
            "INSERT INTO src_badge_events (badge_id, person_type, event_time, event_type) VALUES (?, ?, ?, ?)",
            events_data
        )
        conn.commit()

        batch_no += 1
        print(f"Batch {batch_no} inserted — Total {total_events}")
        events_data = []

# insert remaining
if events_data:
    cursor.executemany(
        "INSERT INTO src_badge_events (badge_id, person_type, event_time, event_type) VALUES (?, ?, ?, ?)",
        events_data
    )
    conn.commit()

print("DONE — Final total:", total_events)



Batch 1 inserted — Total 50000
Batch 2 inserted — Total 100000
Batch 3 inserted — Total 150000
Batch 4 inserted — Total 200000
Batch 5 inserted — Total 250000
Batch 6 inserted — Total 300000
Batch 7 inserted — Total 350000
Batch 8 inserted — Total 400000
Batch 9 inserted — Total 450000
Batch 10 inserted — Total 500000
Batch 11 inserted — Total 550000
Batch 12 inserted — Total 600000
Batch 13 inserted — Total 650000
Batch 14 inserted — Total 700000
Batch 15 inserted — Total 750000
Batch 16 inserted — Total 800000
Batch 17 inserted — Total 850000
Batch 18 inserted — Total 900000
Batch 19 inserted — Total 950000
Batch 20 inserted — Total 1000000
DONE — Final total: 1000000


In [33]:
%%sql
SELECT COUNT(*) FROM src_badge_events;



 * sqlite:///badge_attendance.db
Done.


COUNT(*)
1000000


In [34]:
%%sql
SELECT person_type, COUNT(*) FROM src_badge_events GROUP BY person_type;


 * sqlite:///badge_attendance.db
Done.


person_type,COUNT(*)
student,850056
teacher,149944


In [35]:
%%sql
SELECT *
FROM src_badge_events
LIMIT 5;


 * sqlite:///badge_attendance.db
Done.


event_id,badge_id,person_type,event_time,event_type
1,BADGE000484,student,2024-09-10T08:32:00,IN
2,BADGE000484,student,2024-09-10T09:15:00,OUT
3,BADGE001584,student,2024-11-07T12:43:00,IN
4,BADGE001584,student,2024-11-07T16:08:00,OUT
5,TEACHER00051,teacher,2024-10-04T13:53:00,IN


In [36]:
%%sql
SELECT event_type, COUNT(*)
FROM src_badge_events
GROUP BY event_type;


 * sqlite:///badge_attendance.db
Done.


event_type,COUNT(*)
IN,500000
OUT,500000


In [37]:
%%sql
SELECT *
FROM src_badge_events
WHERE badge_id = 'TEACHER00069'
LIMIT 10;


 * sqlite:///badge_attendance.db
Done.


event_id,badge_id,person_type,event_time,event_type
2711,TEACHER00069,teacher,2024-09-16T14:36:00,IN
2712,TEACHER00069,teacher,2024-09-16T15:51:00,OUT
4137,TEACHER00069,teacher,2025-02-27T15:47:00,IN
4138,TEACHER00069,teacher,2025-02-27T16:33:00,OUT
4691,TEACHER00069,teacher,2024-11-28T10:20:00,IN
4692,TEACHER00069,teacher,2024-11-28T14:33:00,OUT
5889,TEACHER00069,teacher,2025-03-11T09:15:00,IN
5890,TEACHER00069,teacher,2025-03-11T12:55:00,OUT
7279,TEACHER00069,teacher,2025-03-29T15:08:00,IN
7280,TEACHER00069,teacher,2025-03-29T16:28:00,OUT


In [38]:
%%sql
SELECT DATE(event_time) AS day,
       COUNT(*) AS total_events
FROM src_badge_events
GROUP BY day
ORDER BY day
LIMIT 10;


 * sqlite:///badge_attendance.db
Done.


day,total_events
2024-09-01,4658
2024-09-02,4504
2024-09-03,4588
2024-09-04,4552
2024-09-05,4482
2024-09-06,4432
2024-09-07,4634
2024-09-08,4500
2024-09-09,4486
2024-09-10,4502
