In [1]:
import sqlite3, pandas as pd, json
from datetime import datetime
# simple baseline grader (same as before) for this notebook
import re
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import normalize

In [2]:
import os 
os.chdir(os.getcwd().split('\\notebooks')[0])
main = os.getcwd()

DB_PATH = main + "\\data\\temporal\\exams.db"

In [3]:
def init_db(db_path=DB_PATH):
    con = sqlite3.connect(db_path)
    cur = con.cursor()
    cur.executescript("""
    PRAGMA journal_mode=WAL;
    CREATE TABLE IF NOT EXISTS exams(
      exam_id TEXT PRIMARY KEY,
      exam_type TEXT,
      date TEXT,
      year INTEGER, month INTEGER, day INTEGER
    );
    CREATE TABLE IF NOT EXISTS questions(
      exercise_id TEXT PRIMARY KEY,
      exam_id TEXT,
      question TEXT,
      solution TEXT,
      topic_pred TEXT,
      topic_score REAL,
      difficulty INTEGER,
      FOREIGN KEY(exam_id) REFERENCES exams(exam_id)
    );
    CREATE TABLE IF NOT EXISTS users(
      user_id INTEGER PRIMARY KEY AUTOINCREMENT,
      username TEXT UNIQUE
    );
    CREATE TABLE IF NOT EXISTS attempts(
      id INTEGER PRIMARY KEY AUTOINCREMENT,
      user_id INTEGER,
      exercise_id TEXT,
      ts TEXT DEFAULT (datetime('now')),
      score REAL,
      correct INTEGER,
      cosine REAL,
      jaccard REAL,
      missing_keywords TEXT,
      student_answer TEXT,
      FOREIGN KEY(user_id) REFERENCES users(user_id),
      FOREIGN KEY(exercise_id) REFERENCES questions(exercise_id)
    );
    CREATE INDEX IF NOT EXISTS idx_attempts_user ON attempts(user_id);
    CREATE INDEX IF NOT EXISTS idx_attempts_ex ON attempts(exercise_id);
    """)
    con.commit(); con.close()

# 1) Create schema if missing
init_db()

# 2) (Optional) sanity-check tables exist
con = sqlite3.connect(DB_PATH)
tables = [r[0] for r in con.execute("SELECT name FROM sqlite_master WHERE type='table'")]
con.close()
print("Tables:", tables)


Tables: ['exams', 'questions', 'users', 'sqlite_sequence', 'attempts']


In [4]:
# (paste these small helper stubs or import them if you saved to a .py)
def get_user_id(username:str, db_path=DB_PATH) -> int:
    con = sqlite3.connect(db_path); cur = con.cursor()
    cur.execute("INSERT OR IGNORE INTO users(username) VALUES(?)", (username,))
    con.commit()
    cur.execute("SELECT user_id FROM users WHERE username=?", (username,))
    uid = cur.fetchone()[0]; con.close(); return uid

def pick_unseen(username:str, k:int=5, db_path=DB_PATH):
    uid = get_user_id(username, db_path)
    con = sqlite3.connect(db_path); cur = con.cursor()
    cur.execute("""
      SELECT q.exercise_id, q.topic_pred, e.date
      FROM questions q JOIN exams e ON q.exam_id=e.exam_id
      WHERE q.exercise_id NOT IN (SELECT exercise_id FROM attempts WHERE user_id=?)
      ORDER BY e.date ASC LIMIT ?""", (uid, k))
    rows = cur.fetchall(); con.close()
    return rows

def fetch_question(exercise_id:str, db_path=DB_PATH):
    con = sqlite3.connect(db_path); cur = con.cursor()
    cur.execute("""
      SELECT q.exercise_id, q.question, q.solution, q.topic_pred, e.date, e.exam_type
      FROM questions q JOIN exams e ON q.exam_id=e.exam_id
      WHERE q.exercise_id=?""", (exercise_id,))
    row = cur.fetchone(); con.close()
    return dict(zip(["exercise_id","question","solution","topic","date","exam_type"], row))


def _clean(s:str) -> str:
    s = (s or "").lower()
    s = re.sub(r"\(cid:\d+\)", " ", s); s = re.sub(r"[^a-z0-9\-\+\*/\^\=\(\)\[\]\{\}\., ]+"," ",s)
    return re.sub(r"\s+"," ", s).strip()
STOP = set("the a an and or of to for with from in on at is are be was were by as that this these those into over under if then else such".split())
def _keywords(s:str):
    toks = re.findall(r"[a-z0-9\^\+\-\*/=]+", _clean(s))
    return {t for t in toks if len(t)>=2 and t not in STOP}
def grade_answer(solution:str, student:str):
    sol = _clean(solution); ans = _clean(student)
    vec = TfidfVectorizer(analyzer="char_wb", ngram_range=(3,5), min_df=1)
    X = vec.fit_transform([sol, ans]); X = normalize(X)
    cos = float((X[0] @ X[1].T).A[0,0]) if X.shape[1] else 0.0
    Ks, Ka = _keywords(sol), _keywords(ans)
    jac = len(Ks & Ka) / max(1, len(Ks | Ka))
    score = 0.6*cos + 0.4*jac
    return {"score": round(score,4), "correct": score>=0.6, "cosine": round(cos,4), "jaccard": round(jac,4),
            "missing_keywords": list((Ks-Ka))[:8]}

def submit_answer_db(username:str, exercise_id:str, student_answer:str, db_path=DB_PATH):
    uid = get_user_id(username, db_path)
    q = fetch_question(exercise_id, db_path)
    g = grade_answer(q["solution"], student_answer)
    con = sqlite3.connect(db_path); cur = con.cursor()
    cur.execute("""INSERT INTO attempts(user_id, exercise_id, score, correct, cosine, jaccard, missing_keywords, student_answer)
                   VALUES(?,?,?,?,?,?,?,?)""",
                (uid, exercise_id, g["score"], int(g["correct"]), g["cosine"], g["jaccard"],
                 json.dumps(g["missing_keywords"]), student_answer))
    con.commit(); con.close()
    return {"exercise_id": exercise_id, "topic": q["topic"], **g}


In [5]:
# Try it
USERNAME = "student2"

rows = pick_unseen(USERNAME, k=3, db_path=DB_PATH)
print("Unseen candidates:", rows)

ex_id = rows[0][0]
q = fetch_question(ex_id)
print("\nQuestion:", q["exercise_id"], q["topic"], q["date"])
print(q["question"])

# Simulate an answer
res = submit_answer_db(USERNAME, ex_id, "My attempt about contraction mapping and fixed points...")
print("\nResult:", res)


Unseen candidates: [('Exercise 4', 'linear_functionals_and_operators', '2025-08-29'), ('Exercise 5', 'metric_spaces', '2025-08-29'), ('Exercise 6', 'dynamic_optimization', '2025-08-29')]

Question: Exercise 4 linear_functionals_and_operators 2025-08-29
Consider R n endowed with the supnorm .3 We say that an operator f : R n R n is k k ! monotone if and only if 1 x y = f(x) f(y):4 ) Consider two operators f;g : R n R n and assume they are both -contractions with (0;1). ! 2 Denote their unique xed points by x and x . f g 1. Prove that if f g and f is monotone,5 then x x . f g 2. Prove that if g = 1I + 1f,6 then x = x . 2 2 f g

Result: {'exercise_id': 'Exercise 4', 'topic': 'linear_functionals_and_operators', 'score': 0.084, 'correct': False, 'cosine': 0.1293, 'jaccard': 0.0161, 'missing_keywords': ['n+1', 'gn+1', 'o2nfl', 'th', 'follows', 'will', '=x', 'complete']}


In [6]:
# See weak topics summary
def topic_summary(username:str, db_path=DB_PATH):
    uid = get_user_id(username, db_path)
    con = sqlite3.connect(db_path); cur = con.cursor()
    cur.execute("""
      SELECT q.topic_pred as topic, AVG(a.score) as avg_score, COUNT(*) as n
      FROM attempts a JOIN questions q ON a.exercise_id=q.exercise_id
      WHERE a.user_id=?
      GROUP BY q.topic_pred
      ORDER BY avg_score ASC
    """, (uid,))
    rows = cur.fetchall(); con.close()
    return pd.DataFrame(rows, columns=["topic","avg_score","n"])

topic_summary(USERNAME)


Unnamed: 0,topic,avg_score,n
0,brouwer_fixed_point_theorem,0.0185,2
1,linear_functionals_and_operators,0.321,4


# PROBAR

In [1]:
import sqlite3

In [3]:
con = sqlite3.connect(r"D:\ESS\ocr_math_q\data\temporal\exams.db")

In [4]:
cur = con.cursor()

In [9]:
cur.execute('SELECT * from questions')

<sqlite3.Cursor at 0x257105164c0>

In [10]:
results = cur.fetchall()
print(results)

[('Exercise 1', 'General_2025-08-29', 'Consider a functional f : R n R. State and prove the Riesz representation theorem ! (that is, the theorem that provides a characterization for linear functionals).', 'See the lecture notes.', 'linear_functionals_and_operators', None, None), ('Exercise 2', 'General_2025-08-29', 'Consider R n endowed with the d 1 distance.1 Consider a nonempty subset C of X. We say that x in an algebraic interior point of C if and only if for each y R n there exists > 0 such 2 that x+ y C. We denote the set of all algebraic interior points of C by algC. 2 1. Prove that intC algC. 2. Prove that if C is convex and x algC, then x C. 2 2 3. Prove that if C is convex, then intC = algC.', '. 1. If intC = , then trivially we have that intC algC. If intC = , consider x intC. ; 6 ; 2 It follows that there exists " > 0 such that B " (x) C. Consider y 2 R n. Set = k y k " 1 +1 > 0. Note that d (x;x+ y) = x+ y x = y < ", proving that x+ y B (x) C. Since x and y 1 k k1 k k1 2 " 