# User Entity Benchmarking

This notebook benchmarks the performance of a Monolithic User table versus a Fragmented approach (Auth vs Profile).

## Hypothesis
Splitting frequently accessed authentication data (Email, Password, Role) from profile data (Bio, Location, Image) will improve login performance and reduce memory footprint for auth-heavy operations.

In [None]:
# Install dependencies if not already installed
!pip install sqlmodel faker mysql-connector-python

In [None]:
import time
import random
import os
from typing import Optional
from sqlmodel import Field, SQLModel, create_engine, Session, select
from faker import Faker
from enum import Enum
from uuid import UUID, uuid4
from datetime import datetime

# Ensure temp directory exists
os.makedirs("temp/db", exist_ok=True)

DATABASE_URL = "sqlite:///temp/db/benchmarking_user.db"
engine = create_engine(DATABASE_URL, echo=False)
fake = Faker()

## 1. Monolithic Architecture
All user data in a single table.

In [None]:
class UserRole(str, Enum):
    GUEST = "GUEST"
    MEMBER = "MEMBER"
    ADMIN = "ADMIN"

class UserMonolith(SQLModel, table=True):
    uid: UUID = Field(default_factory=uuid4, primary_key=True)
    email: str = Field(index=True, unique=True)  # Auth
    password_hash: str  # Auth
    role: UserRole = Field(default=UserRole.MEMBER)  # Auth
    is_verified: bool = Field(default=False)  # Auth
    name: str  # Profile
    phone: Optional[str] = Field(default=None)  # Profile
    location: Optional[str] = Field(default=None)  # Profile
    bio: Optional[str] = Field(default=None)  # Profile
    profile_image: Optional[str] = Field(default=None)  # Profile
    created_at: datetime = Field(default_factory=datetime.utcnow)  # Meta
    last_active: datetime = Field(default_factory=datetime.utcnow)  # Meta

## 2. Fragmented Architecture
Split into `UserAuth` (Hot) and `UserProfile` (Cold).

In [None]:
class UserAuth(SQLModel, table=True):
    uid: UUID = Field(default_factory=uuid4, primary_key=True)
    email: str = Field(index=True, unique=True)
    password_hash: str
    role: UserRole = Field(default=UserRole.MEMBER)
    is_verified: bool = Field(default=False)

class UserProfile(SQLModel, table=True):
    uid: UUID = Field(primary_key=True, foreign_key="userauth.uid")
    name: str
    phone: Optional[str]
    location: Optional[str]
    bio: Optional[str]
    profile_image: Optional[str]
    created_at: datetime = Field(default_factory=datetime.utcnow)
    last_active: datetime = Field(default_factory=datetime.utcnow)

## 3. Data Generation
Generating 10,000 users.

In [None]:
ENTRY_COUNT = 10_000

users_data = []
for _ in range(ENTRY_COUNT):
    users_data.append({
        "email": fake.unique.email(),
        "password_hash": fake.sha256(),
        "role": random.choice(list(UserRole)),
        "is_verified": fake.boolean(),
        "name": fake.name(),
        "phone": fake.phone_number(),
        "location": fake.city(),
        "bio": fake.text(),
        "profile_image": fake.image_url(),
        "created_at": fake.date_time_this_year(),
        "last_active": fake.date_time_this_year()
    })

# Create tables
SQLModel.metadata.create_all(engine)

## 4. Benchmarks

In [None]:
# Benchmark Monolith Insert
start_time = time.perf_counter()
with Session(engine) as session:
    for data in users_data:
        user = UserMonolith(**data)
        session.add(user)
    session.commit()
mono_insert_time = time.perf_counter() - start_time
print(f"Monolith Insert Time: {mono_insert_time:.4f}s")

In [None]:
# Benchmark Fragmented Insert
start_time = time.perf_counter()
with Session(engine) as session:
    for data in users_data:
        # Split data
        auth_data = {k: v for k, v in data.items() if k in UserAuth.__fields__}
        profile_data = {k: v for k, v in data.items() if k in UserProfile.__fields__}
        
        auth = UserAuth(**auth_data)
        session.add(auth)
        session.flush() # Get UID
        
        profile = UserProfile(uid=auth.uid, **profile_data)
        session.add(profile)
    session.commit()
frag_insert_time = time.perf_counter() - start_time
print(f"Fragmented Insert Time: {frag_insert_time:.4f}s")

In [None]:
# Benchmark Login (Auth Query)
target_emails = [u["email"] for u in random.sample(users_data, 1000)]

# Monolith Login
start_time = time.perf_counter()
with Session(engine) as session:
    for email in target_emails:
        # We only need auth fields theoretically, but monolith fetches row
        session.exec(select(UserMonolith).where(UserMonolith.email == email)).first()
mono_login_time = time.perf_counter() - start_time

# Fragmented Login
start_time = time.perf_counter()
with Session(engine) as session:
    for email in target_emails:
        session.exec(select(UserAuth).where(UserAuth.email == email)).first()
frag_login_time = time.perf_counter() - start_time

print(f"Monolith Login (1000 ops): {mono_login_time:.4f}s")
print(f"Fragmented Login (1000 ops): {frag_login_time:.4f}s")