In [1]:
import sqlite3, json
from datetime import datetime, timedelta, timezone
import pandas as pd

DB_PATH = "/home/drake/scripts/sql_python_files/derp1.db"
conn = sqlite3.connect(DB_PATH)
cur = conn.cursor()

def iso_now(offset_minutes=0):
    return (datetime.now(timezone.utc) + timedelta(minutes=offset_minutes)).isoformat()

cur.executescript("""
PRAGMA foreign_keys = ON;

CREATE TABLE IF NOT EXISTS request_logs (
  ts           TEXT,
  service      TEXT,
  env          TEXT,
  endpoint     TEXT,
  user_id      INTEGER,
  status       INTEGER,
  latency_ms   INTEGER,
  bytes_sent   INTEGER,
  trace_id     TEXT,
  attrs        TEXT
);
CREATE TABLE IF NOT EXISTS app_logs (
  ts           TEXT,
  service      TEXT,
  env          TEXT,
  level        TEXT,
  message      TEXT,
  exception    TEXT,
  trace_id     TEXT,
  attrs        TEXT
);
CREATE TABLE IF NOT EXISTS auth_events (
  ts           TEXT,
  user_id      INTEGER,
  event        TEXT,
  ip           TEXT,
  user_agent   TEXT
);
CREATE TABLE IF NOT EXISTS deployments (
  deployed_at  TEXT,
  service      TEXT,
  env          TEXT,
  version      TEXT
);
CREATE TABLE IF NOT EXISTS users (
  id           INTEGER PRIMARY KEY,
  plan         TEXT,
  created_at   TEXT
);
CREATE INDEX IF NOT EXISTS idx_req_env_service_ts ON request_logs(env, service, ts);
CREATE INDEX IF NOT EXISTS idx_req_status_ts ON request_logs(status, ts);
CREATE INDEX IF NOT EXISTS idx_app_env_service_ts ON app_logs(env, service, ts);
CREATE INDEX IF NOT EXISTS idx_auth_user_ts ON auth_events(user_id, ts);
""")

# Seed data
users = [
    (1, "free", iso_now(-86400)),
    (2, "pro", iso_now(-10080)),
    (3, "enterprise", iso_now(-1440)),
]
cur.executemany("INSERT OR IGNORE INTO users (id, plan, created_at) VALUES (?, ?, ?)", users)

req_rows = [
    (iso_now(-120), "api", "prod", "/v1/orders", 1, 200, 123, 2048, "t-1", json.dumps({"region":"us-west","feature":"f1"})),
    (iso_now(-115), "api", "prod", "/v1/orders", 2, 500, 980, 512, "t-2", json.dumps({"region":"us-east","feature":"f1"})),
    (iso_now(-110), "api", "prod", "/v1/search", 2, 200, 45, 4096, "t-3", json.dumps({"region":"us-west","feature":"f2"})),
    (iso_now(-105), "web", "prod", "/",  None, 200, 15, 1024, "t-4", json.dumps({"region":"us-west"})),
    (iso_now(-30),  "api", "stage","/v1/orders", 3, 503, 1500, 128, "t-5", json.dumps({"region":"eu-central","feature":"f3"})),
]
cur.executemany("""
INSERT INTO request_logs (ts, service, env, endpoint, user_id, status, latency_ms, bytes_sent, trace_id, attrs)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
""", req_rows)

app_rows = [
    (iso_now(-115), "api", "prod", "ERROR", "Order service failed", "Traceback ...", "t-2", json.dumps({"orderId":"ABC123"})),
    (iso_now(-30),  "api", "stage","ERROR", "DB timeout", "TimeoutError ...", "t-5", json.dumps({"retry":True})),
]
cur.executemany("""
INSERT INTO app_logs (ts, service, env, level, message, exception, trace_id, attrs)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", app_rows)

auth_rows = [
    (iso_now(-300), 1, "LOGIN_SUCCESS", "203.0.113.10", "Mozilla/5.0"),
    (iso_now(-295), 2, "LOGIN_FAILURE", "203.0.113.11", "Mozilla/5.0"),
    (iso_now(-290), 2, "LOGIN_SUCCESS", "203.0.113.11", "Mozilla/5.0"),
]
cur.executemany("""
INSERT INTO auth_events (ts, user_id, event, ip, user_agent)
VALUES (?, ?, ?, ?, ?)
""", auth_rows)

dep_rows = [
    (iso_now(-180), "api", "prod", "v1.2.3"),
    (iso_now(-60),  "api", "stage", "v1.2.4-rc1"),
]
cur.executemany("""
INSERT INTO deployments (deployed_at, service, env, version)
VALUES (?, ?, ?, ?)
""", dep_rows)

conn.commit()

# Example query to verify
pd.read_sql_query("""
SELECT endpoint,
       COUNT(*) AS requests,
       ROUND(AVG(latency_ms), 1) AS avg_latency_ms,
       SUM(CASE WHEN status >= 500 THEN 1 ELSE 0 END) AS errors
FROM request_logs
WHERE ts >= datetime('now', '-3 hours')
GROUP BY endpoint
ORDER BY requests DESC;
""", conn)


Unnamed: 0,endpoint,requests,avg_latency_ms,errors
0,/v1/orders,7,858.0,5
1,/v1/search,3,60.0,0
2,/,2,15.0,0
3,/v1/export,1,120.0,0


In [2]:
# --- Seed events that will match the JOIN (requests within 15 minutes after LOGIN_SUCCESS)

# Example 1: user 2 logs in, then makes two requests (one big bytes, one 5xx)
login_ts_u2 = iso_now(-10)  # 10 minutes ago

cur.execute("""
INSERT INTO auth_events (ts, user_id, event, ip, user_agent)
VALUES (?, ?, 'LOGIN_SUCCESS', ?, ?)
""", (login_ts_u2, 2, '198.51.100.23', 'Mozilla/5.0 (X11; Linux x86_64)'))

# request within 5 minutes (large bytes -> looks suspicious)
cur.execute("""
INSERT INTO request_logs (ts, service, env, endpoint, user_id, status, latency_ms, bytes_sent, trace_id, attrs)
VALUES (?, 'api', 'prod', '/v1/export', ?, 200, 120, 250000, 'bf-1', '{"region":"us-west","feature":"bulk-export"}')
""", (iso_now(-5), 2))

# another request within 8 minutes (5xx error)
cur.execute("""
INSERT INTO request_logs (ts, service, env, endpoint, user_id, status, latency_ms, bytes_sent, trace_id, attrs)
VALUES (?, 'api', 'prod', '/v1/orders', ?, 503, 800, 1024, 'bf-2', '{"region":"us-west","feature":"orders"}')
""", (iso_now(-2), 2))

# Example 2: user 3 logs in, then calls search (normal-ish)
login_ts_u3 = iso_now(-12)  # 12 minutes ago

cur.execute("""
INSERT INTO auth_events (ts, user_id, event, ip, user_agent)
VALUES (?, ?, 'LOGIN_SUCCESS', ?, ?)
""", (login_ts_u3, 3, '203.0.113.77', 'Mozilla/5.0 (Macintosh; Intel Mac OS X)'))

cur.execute("""
INSERT INTO request_logs (ts, service, env, endpoint, user_id, status, latency_ms, bytes_sent, trace_id, attrs)
VALUES (?, 'api', 'prod', '/v1/search', ?, 200, 90, 4096, 'norm-1', '{"region":"us-east","feature":"search"}')
""", (iso_now(-3), 3))

conn.commit()
print("Inserted sample login + follow-up requests that match the JOIN window.")


Inserted sample login + follow-up requests that match the JOIN window.


In [3]:
#prints logs where status is less then 500
query1 = pd.read_sql_query(
    "SELECT * FROM request_logs WHERE status>=500;",
    conn
)
print(query1)

                                 ts service    env    endpoint  user_id  \
0  2025-08-31T02:35:30.146284+00:00     api   prod  /v1/orders        2   
1  2025-08-31T18:19:05.040427+00:00     api   prod  /v1/orders        2   
2  2025-08-31T04:00:30.146294+00:00     api  stage  /v1/orders        3   
3  2025-08-31T06:02:06.254300+00:00     api   prod  /v1/orders        2   
4  2025-08-31T19:44:05.040439+00:00     api  stage  /v1/orders        3   
5  2025-08-31T20:12:06.213294+00:00     api   prod  /v1/orders        2   

   status  latency_ms  bytes_sent trace_id  \
0     500         980         512      t-2   
1     500         980         512      t-2   
2     503        1500         128      t-5   
3     503         800        1024     bf-2   
4     503        1500         128      t-5   
5     503         800        1024     bf-2   

                                       attrs  
0     {"region": "us-east", "feature": "f1"}  
1     {"region": "us-east", "feature": "f1"}  
2  {"regio

In [4]:
#prints logs between 400 and 499
query2 = pd.read_sql_query(
    "SELECT * FROM request_logs WHERE status BETWEEN 400 and 499;",
    conn
)
print(query2)

Empty DataFrame
Columns: [ts, service, env, endpoint, user_id, status, latency_ms, bytes_sent, trace_id, attrs]
Index: []


In [5]:
#Q: How would you query for spikes in error rates over the last 24 hours?
query3 = pd.read_sql_query("""
SELECT 
    strftime('%Y-%m-%d %H:00', ts) AS hour_bucket,
    COUNT(*) AS error_count
FROM request_logs
WHERE status >= 500
  AND ts >= datetime('now', '-24 hours')
GROUP BY hour_bucket
ORDER BY hour_bucket;
""", conn)

print(query3)

        hour_bucket  error_count
0  2025-08-31 02:00            1
1  2025-08-31 04:00            1
2  2025-08-31 06:00            1
3  2025-08-31 18:00            1
4  2025-08-31 19:00            1
5  2025-08-31 20:00            1
