# Exploration: *Défi / Quiz du Jour* Results

This notebook helps you: 
1. Inspect which quiz is mapped to a given date in the local SQLite DB.
2. Load your locally recorded attempts & derived leaderboard (using `QuizDB.daily_table`).
3. Fetch the public daily archive page, extract the massive embedded JSON-like ranking payload, and parse it.
4. Locate a specific player's performance (e.g. `BastienZim`).
5. (Optional) Compare local stored attempt vs. public archive score/duration.

> Run the cells top-to-bottom. Adjust the `TARGET_DATE` and `PLAYER` variables as needed.

In [1]:
# Imports & path setup
import sys, re, json, math, statistics, textwrap
from pathlib import Path
BASE = Path('..').resolve()  # notebook in notebooks/ -> go up one level
if str(BASE) not in sys.path:
    sys.path.insert(0, str(BASE))
from src.database import QuizDB
from src.scraper import QuizypediaScraper
from src.utils import RateLimiter
print('Sys path ok. Project root:', BASE)

Sys path ok. Project root: /home/bastienzim/Documents/perso/fan2quizz


In [2]:
# Parameters
DB_PATH = '../db/quizypedia.db'   # adjust if needed
TARGET_DATE = '2025-10-14'        # YYYY-MM-DD
PLAYER = 'BastienZim'             # Player nickname to locate in archive
QUIZ_TOTAL_FALLBACK = 20          # If total questions not obvious in archive entry
print(DB_PATH, TARGET_DATE, PLAYER)

../db/quizypedia.db 2025-10-14 BastienZim


In [3]:
# 1. Inspect daily quiz mapping
from pprint import pprint

db = QuizDB(DB_PATH)
quiz_id = db.get_daily_quiz(TARGET_DATE)
if quiz_id is None:
    print(f"No daily quiz set locally for {TARGET_DATE}. You can set one via CLI:")
    print(f"  python -m src.cli set-daily --date {TARGET_DATE} --quiz-id <id>")
else:
    quiz = db.get_quiz(quiz_id)
    title = quiz['title'] if quiz else '(missing)'
    print(f"Date {TARGET_DATE} -> quiz_id={quiz_id}: {title}")
    if quiz:
        print('Tags:', ', '.join(quiz['tags']))
        print('Questions:', len(quiz['questions']))

No daily quiz set locally for 2025-10-14. You can set one via CLI:
  python -m src.cli set-daily --date 2025-10-14 --quiz-id <id>


In [4]:
# 2. Local leaderboard (derived from attempts)
table = db.daily_table(TARGET_DATE) if quiz_id else []
if not table:
    print('No local attempts / leaderboard entries for this date yet.')
else:
    print('Local derived leaderboard:')
    for rank, row in enumerate(table, start=1):
        player, best_score, total, attempts, best_duration, external_rank = row
        print(f'{rank:>3} {player:<15} {best_score}/{total} attempts={attempts} dur={best_duration} rank={external_rank}')

No local attempts / leaderboard entries for this date yet.


In [5]:
# 3. Fetch archive page & capture embedded ranking payload
scraper = QuizypediaScraper(rate_limiter=RateLimiter(0.2))
html = scraper.get_daily_archive_html(int(TARGET_DATE[0:4]), int(TARGET_DATE[5:7]), int(TARGET_DATE[8:10]))
print(f'Archive HTML length: {len(html)} bytes')

# Strategy: the site embeds a long JS array of objects with key "good_responses".
# We'll look for the first '[' followed by '{"good_responses"' and grab until the matching closing ']'.
import re
array_start = html.find('[{"good_responses"')
raw_payload = None
if array_start != -1:
    # naive bracket counting to find the matching closing ']' at same nesting level
    depth = 0
    for i, ch in enumerate(html[array_start:], start=array_start):
        if ch == '[':
            depth += 1
        elif ch == ']':
            depth -= 1
            if depth == 0:
                raw_payload = html[array_start:i+1]
                break
if raw_payload:
    print('Raw payload length:', len(raw_payload))
else:
    print('No payload array detected via bracket scan. Consider dumping HTML to inspect manually.')

Archive HTML length: 446072 bytes
Raw payload length: 317410


In [6]:
# 4. Clean & parse the payload into Python objects
results = []
if raw_payload:
    cleaned = raw_payload.strip()
    # Remove any trailing semicolon
    if cleaned.endswith(';'):
        cleaned = cleaned[:-1]
    # JSON in page may contain shortened player names with ellipsis inside quotes (e.g., "player": "chimereve@hotm...")
    # We'll preserve them (valid JSON) but for matching we'll compare prefixes before ellipsis.
    import json, html as html_mod
    # Replace escaped unicode \u00e9 etc will be handled by json.loads automatically.
    try:
        results = json.loads(cleaned)
    except json.JSONDecodeError as e:
        print('Primary JSON parse failed:', e)
        # Attempt a permissive fix: remove any JS comment-like fragments
        cleaned2 = re.sub(r'//.*?\n', '\n', cleaned)
        try:
            results = json.loads(cleaned2)
        except Exception as e2:
            print('Secondary parse failed:', e2)
print(f'Parsed {len(results)} ranking entries') if results else None
results[:2]  # preview

Parsed 1439 ranking entries


[{'good_responses': 20,
  'elapsed_time': 103,
  'user': 'zentak',
  'rank': 1,
  'newScore': False,
  'player': 'zentak',
  'score': '<div class="myFont2  size12"><span class="myFont2Bold">20 / 20</span>  - 103 s</div>'},
 {'good_responses': 20,
  'elapsed_time': 124,
  'user': 'Suplex',
  'rank': 2,
  'newScore': False,
  'player': 'Suplex',
  'score': '<div class="myFont2  size12"><span class="myFont2Bold">20 / 20</span>  - 124 s</div>'}]

In [7]:
# 5. Locate the target player in parsed results
player_entry = None
needle = PLAYER.lower()
for obj in results:
    uname = (obj.get('user') or obj.get('player') or '').lower()
    if uname == needle:
        player_entry = obj
        break
    # Allow prefix match before ellipsis truncation
    if uname.endswith('...') and needle.startswith(uname[:-3]):
        player_entry = obj
        break

if player_entry:
    rank = player_entry.get('rank')
    good = player_entry.get('good_responses')
    elapsed = player_entry.get('elapsed_time')
    total = QUIZ_TOTAL_FALLBACK
    print(f'Player {PLAYER} -> rank={rank} score={good}/{total} duration={elapsed}s')
else:
    print(f'Player {PLAYER} not found in parsed archive data.')

Player BastienZim -> rank=998 score=11/20 duration=251s


In [8]:
# 6. Basic distribution stats
import statistics
if results:
    scores = [r.get('good_responses') for r in results if isinstance(r.get('good_responses'), int)]
    durations = [r.get('elapsed_time') for r in results if isinstance(r.get('elapsed_time'), int)]
    if scores:
        print(f'Scores: n={len(scores)} min={min(scores)} max={max(scores)} mean={statistics.mean(scores):.2f}')
    if durations:
        print(f'Durations: n={len(durations)} min={min(durations)} max={max(durations)} median={statistics.median(durations)}')
else:
    print('No parsed results to summarize.')

Scores: n=1439 min=0 max=20 mean=12.48
Durations: n=1439 min=70 max=428 median=233


In [9]:
! uv add pandas


[2mResolved [1m24 packages[0m [2min 0.61ms[0m[0m
[2mAudited [1m22 packages[0m [2min 0.02ms[0m[0m


In [13]:
# 7. Display selected players (pretty print using rich if available, else plain)
# Configure the players you want to inspect (case-insensitive)
SELECTED_PLAYERS = [
    'jutabouret',
    'louish',
    'KylianMbappe',
    'BastienZim',
    'kamaiel',
    'Pascal-Condamine'
]

import pandas as pd
try:
    from rich.console import Console
    from rich.table import Table
    from rich import box
    HAS_RICH = True
except ImportError:
    HAS_RICH = False

if not results:
    print("No results parsed yet – run earlier cells first.")
else:
    wanted = {p.lower(): p for p in SELECTED_PLAYERS}
    rows = []
    for obj in results:
        uname = (obj.get('user') or obj.get('player') or '')
        if uname.lower() in wanted:
            rows.append({
                'player': uname,
                'rank': obj.get('rank'),
                'good_responses': obj.get('good_responses'),
                'total': QUIZ_TOTAL_FALLBACK,
                'elapsed_time_s': obj.get('elapsed_time'),
                'newScore': obj.get('newScore')
            })
    df = pd.DataFrame(rows)
    if df.empty:
        print("None of the selected players were found in the archive data.")
    else:
        df['pct'] = (df['good_responses'] / df['total'] * 100).round(1)
        def fmt_elapsed(x):
            if pd.isna(x):
                return ''
            try:
                x = int(x)
            except Exception:
                return ''
            return f"{x//60}:{x%60:02d}" if x >= 60 else f"0:{x:02d}"
        df['elapsed_fmt'] = df['elapsed_time_s'].apply(fmt_elapsed)
        df = df.sort_values(['rank','player'], na_position='last')
        display_cols = ['rank','player','good_responses','total','pct','elapsed_fmt']

        if HAS_RICH:
            console = Console()
            table = Table(title=f"Selected Players ({len(df)})", box=box.MINIMAL_DOUBLE_HEAD, header_style="bold cyan")
            col_styles = {
                'rank': 'bold yellow',
                'player': 'white',
                'good_responses': 'bright_green',
                'total': 'green',
                'pct': 'magenta',
                'elapsed_fmt': 'blue'
            }
            for col in display_cols:
                table.add_column(col, justify='center', style=col_styles.get(col, ''))
            for _, r in df[display_cols].iterrows():
                table.add_row(*[str(r[c]) if r[c] is not None else '' for c in display_cols])
            console.print(table)
        else:
            # Plain fallback
            print("(Install 'rich' for colored table: pip install rich)")
            print(df[display_cols].to_string(index=False))

    missing = [p for p in SELECTED_PLAYERS if p.lower() not in {r['player'].lower() for r in rows}]
    if missing:
        print("Missing players:", ", ".join(missing))

Missing players: kamaiel
