In [1]:
from TTS.api import TTS
import IPython.display as ipd

import re, yaml, numpy as np, tiktoken
import ollama
import os, json
import os, sys
import psycopg
from fastapi import FastAPI
from pydantic import BaseModel
from pathlib import Path
from datetime import datetime


  from pkg_resources import resource_filename


In [3]:
EMBED_MODEL = "nomic-embed-text"
LLM = "gemma3:4b"

SYSTEM_PROMPT = """You are an AI Oral Board Examiner. Answer concisely and clinically. 
Use the provided CONTEXT. If uncertain, say so."""

SYSTEM_EXAMINER = """You are the EXAMINER in a surgical oral boards simulation.
Follow the rules from the basecase.md file
"""

BASE_PATH = Path.home() / "OneDrive/Desktop/SoftwareDocuments/baseCase.md"
BASE_MD = Path("C:\\Users\\jmcam\\OneDrive\\Desktop\\SoftwareDocuments\\baseCase.md").read_text(encoding="utf-8")
doc_text = BASE_MD


In [17]:
enc = tiktoken.get_encoding("cl100k_base")
def tokens(s): return len(enc.encode(s))

def split_by_headings(md: str):
    parts = re.split(r"(?m)^##\s+|^###\s+", md)
    return [p.strip() for p in parts if p.strip()]

#Makes chunks <= max_tokens with a 50 overlap so context is not lost
def smart_chunk(md: str, max_tokens=500, overlap=50):
    raw = split_by_headings(md)
    chunks = []
    for part in raw:
        if tokens(part) <= max_tokens:
            chunks.append(part)
        else:
            words = part.split()
            cur, cur_tokens = [], 0
            for w in words:
                tw = tokens(w + " ")
                if cur_tokens + tw > max_tokens:
                    chunks.append(" ".join(cur))
                    # token-overlap to preserve context at boundaries
                    back = enc.decode(enc.encode(" ".join(cur))[-overlap:])
                    cur = back.split() if back else []
                    cur_tokens = tokens(" ".join(cur))
                cur.append(w)
                cur_tokens += tw
            if cur:
                chunks.append(" ".join(cur))
    return chunks

chunks = smart_chunk(doc_text, max_tokens=450, overlap=60)

def embed_batch(texts):
    vecs = []
    for t in texts:
        e = ollama.embeddings(model=EMBED_MODEL, prompt=t)["embedding"]
        vecs.append(np.array(e, dtype=np.float32))
    arr = np.vstack(vecs) if vecs else np.zeros((0,1), dtype=np.float32)
    # normalize for cosine
    arr /= (np.linalg.norm(arr, axis=1, keepdims=True) + 1e-9)
    return arr

chunk_vecs = embed_batch(chunks)

def retrieve(query: str, k=5):
    q = np.array(ollama.embeddings(model=EMBED_MODEL, prompt=query)["embedding"], dtype=np.float32)
    q /= (np.linalg.norm(q) + 1e-9)
    sims = chunk_vecs @ q
    idx = np.argsort(-sims)[:k]
    return [(float(sims[i]), chunks[i]) for i in idx]


#ask next question using retrieved context based on the last candidate utterance
#and (if available) the last examiner question to maintain thread
def examiner_question(history, k_ctx=4, new_case=False):
    has_user = any(m["role"] == "user" for m in history)

    if new_case or not has_user:
        # Fresh start: retrieve generic opening context and *do not* reference prior dialogue
        ctx = retrieve("opening vignette general surgery", k=k_ctx)
        context_block = "\n\n---\n".join([c for _, c in ctx]) if ctx else ""
        messages = [
            {"role": "system", "content": SYSTEM_EXAMINER},
            {"role": "user", "content":
                "CONTEXT:\n"
                f"{context_block}\n\n"
                "Start a NEW, self-contained case. Provide an opening vignette "
                "(age/sex, chief complaint, brief vitals, 1–2 key positives/negatives), "
                "then ask exactly ONE initial question. Do NOT say 'continue' or reference prior dialogue."
            },
        ]
        return ollama.chat(model=LLM, messages=messages)["message"]["content"].strip()

    
    last_user = next((m["content"] for m in reversed(history) if m["role"] == "user"), "acute appendicitis adult")
    last_exam_q = next((m["content"] for m in reversed(history) if m["role"] == "assistant"), "")
    retrieval_query = (last_exam_q + " " + last_user).strip()

    ctx = retrieve(retrieval_query, k=k_ctx)
    context_block = "\n\n---\n".join([c for _, c in ctx]) if ctx else ""

    messages = [
        {"role": "system", "content": SYSTEM_EXAMINER},
        {
            "role": "user",
            "content": (
                "CONTEXT:\n"
                f"{context_block}\n\n"
                f"CANDIDATE PREVIOUS: {last_user}\n"
                "Now ask exactly ONE next oral-boards question. Do not reveal answers."
            ),
        },
    ]

    out = ollama.chat(model=LLM, messages=messages)["message"]["content"].strip()
    return out


def examiner_hint(history):
    last_user = next((m["content"] for m in reversed(history) if m["role"] == "user"), "")
    ctx = retrieve(last_user or "acute appendicitis adult", k=3)
    context_block = "\n\n---\n".join([c for _, c in ctx]) if ctx else ""
    messages = [
        {"role":"system", "content": SYSTEM_EXAMINER},
        {"role":"user", "content":
         f"CONTEXT:\n{context_block}\n"
         f"Candidate said: {last_user}\nProvide ONE short hint (<=25 words)."}
    ]
    return ollama.chat(model=LLM, messages=messages)["message"]["content"].strip()

def examiner_grade(history):
    last_user = next((m["content"] for m in reversed(history) if m["role"] == "user"), "")
    ctx = retrieve(last_user or "evaluation rubric", k=3)
    context_block = "\n\n---\n".join([c for _, c in ctx]) if ctx else ""
    messages = [
        {"role":"system", "content": SYSTEM_EXAMINER},
        {"role":"user", "content":
         f"CONTEXT:\n{context_block}\n"
         f"Evaluate the candidate's last answer:\n\"\"\"{last_user}\"\"\"\n"
         "Return: score 0–10, 2–3 bullet strengths, 2–3 bullet deficits, pass/fail with rationale tied to case facts."}
    ]
    return ollama.chat(model=LLM, messages=messages)["message"]["content"].strip()



In [19]:
def run_sim():
    print("Oral Boards Simulator\n"
          "Commands: /start, /next, /hint, /grade, /end\n"
          "You are the CANDIDATE. The AI is the EXAMINER.\n")
    history = []
    started = False

    while True:
        try:
            user_in = input("> ").strip()

            if user_in.lower() == "/end":
                print("Session ended.")
                break

            if user_in.lower() == "/start":
                started = True
                history = []
                q = examiner_question(history, new_case=True)
                print(f"\nExaminer: {q}\n")
                history.append({"role":"assistant","content":q})
                continue

            if not started:
                print('Type "/start" to begin.')
                continue

            if user_in.lower() == "/next":
                # Advance flow: simply ask next question; retrieval uses last turn
                q = examiner_question(history)
                print(f"\nExaminer: {q}\n")
                history.append({"role":"assistant","content":q})
                continue

            if user_in.lower() == "/hint":
                h = examiner_hint(history)
                print(f"\nHint: {h}\n")
                continue

            if user_in.lower() == "/grade":
                g = examiner_grade(history)
                print(f"\nGrading:\n{g}\n")
                continue

            # Candidate's answer
            history.append({"role":"user","content":user_in})
            if len(history) > 24:
                history = history[-24:]  # keep context compact

            # Next examiner question
            q = examiner_question(history)
            print(f"\nExaminer: {q}\n")
            history.append({"role":"assistant","content":q})

        except (KeyboardInterrupt, EOFError):
            print("\nSession interrupted.")
            break

In [23]:
if __name__ == "__main__":
    print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Loading baseCase from: {BASE_PATH}")
    run_sim()

[2025-10-02 17:32:37] Loading baseCase from: C:\Users\jmcam\OneDrive\Desktop\SoftwareDocuments\baseCase.md
Oral Boards Simulator
Commands: /start, /next, /hint, /grade, /end
You are the CANDIDATE. The AI is the EXAMINER.



>  /start



Examiner: This is the beginning of the exam:

You are presented with a 23-year-old male, Mr. David Miller, who presents to the Emergency Department complaining of right lower quadrant pain that began approximately 18 hours prior. He reports the pain is constant, rated 8/10, and has been worsening over the last 6 hours. He denies fever, chills, nausea, or vomiting. Initial vitals are: Temperature 98.6°F (37°C), Heart Rate 110 bpm, Blood Pressure 130/80 mmHg, Respiratory Rate 18 breaths/min, Oxygen Saturation 98% on room air. Abdominal exam reveals mild tenderness to palpation in the right lower quadrant with guarding. Bowel sounds are normoactive. He has no known allergies and takes no regular medications.

Given this presentation, what is the first step in your management?



>  I would prescribe 5g of ibuprofen taken orally every 6 hours and send them home.



Examiner: Okay, let's proceed.

**Examiner:** The patient has now been discharged home on oral antibiotics and follow-up with his primary care physician. Six weeks later, he returns to your clinic complaining of recurrent right lower quadrant pain. He denies fever or nausea. Physical exam reveals tenderness at the McBurney's point. What is the most likely diagnosis and what is the next step in management?



>  /grade



Grading:
Okay, here’s my evaluation of the candidate’s last answer:

**Score: 2/10**

**Bullet Strengths:**

*   **Recognition of Pain:** The candidate acknowledges the patient likely has pain, which is a crucial initial consideration.


**Bullet Deficits:**

*   **Lack of Specificity & Inadequate Analgesia:** Prescribing “5g of ibuprofen” is entirely insufficient. The patient likely has a significant inflammatory response and will require higher doses and potentially other analgesics.  This demonstrates a failure to consider the magnitude of the problem.
*   **Ignoring IV Access & Supportive Care:** The initial instructions (as presented in the case) explicitly state “Establish IV access…” This answer completely disregards the need for intravenous fluids, antiemetics, and potentially antibiotics – all vital components of initial management.
*   **Failure to Address Underlying Cause:** The answer doesn’t address the bacterial infection that is causing the patient’s symptoms. Simply pr

In [None]:
# This is for later use and was just for testing text to speach


def speak_text(text: str, out_path: str = "answer.wav", speaker: str | None = None):
    kwargs = {}
    if speaker is not None: #multi-speaker models
        kwargs["speaker"]=speaker
    tts.tts_to_file(text=text, file_path=out_path, **kwargs)
    return ipd.Audio(out_path)

def chat_and_speak(prompt: str, out_path="answer.wav", speaker=None):
    resp = ollama.chat(model=LLM, messages=[{"role":"user","content": prompt}])
    text = resp["message"]["content"]
    print("LLM:", text)
    return speak_text(text, out_path, speaker)

# use it:
chat_and_speak("Give three key symptoms of diverticulitis. No disclaimers, no special characters, give short sentences.")

In [55]:
###################################################################################
# Note: Run to commit the file to the git
####################################################################################



import os, sys, subprocess, glob

# Candidate install locations (system + per-user)
cands = [
    r"C:\Program Files\Git\cmd\git.exe",
    r"C:\Program Files (x86)\Git\cmd\git.exe",
    r"%LOCALAPPDATA%\Programs\Git\cmd\git.exe",
]
cands = [os.path.expandvars(p) for p in cands]

# Also try to discover git.exe if installed in a nonstandard place
cands += glob.glob(r"C:\Program Files\Git\cmd\git.exe")
cands += glob.glob(os.path.expandvars(r"%LOCALAPPDATA%\Programs\Git\cmd\git.exe"))

found = next((p for p in cands if os.path.exists(p)), None)
print("Kernel:", sys.executable)
print("Found git at:", found)

if found:
    git_cmd = os.path.dirname(found)
    git_bin = git_cmd.replace(r"\cmd", r"\bin")
    os.environ["PATH"] += ";" + git_cmd + ";" + git_bin
    try:
        print("git --version ->", subprocess.check_output(["git","--version"], text=True).strip())
    except Exception as e:
        print("Tried to add PATH but still failing:", e)
else:
    print("Could not locate git.exe automatically. If you know the path, run:")
    print(r'import os; os.environ["PATH"] += r";C:\Path\To\Git\cmd;C:\Path\To\Git\bin"')
!git remote set-url origin https://github.com/Jonah-Camacho/ENGR-498.git
!copy /Y "C:\Users\jmcam\OralBoardAI.ipynb" "C:\Users\jmcam\Documents\ENGR-498\OralBoardAI.ipynb"
%cd "C:\Users\jmcam\Documents\ENGR-498"
!git add -- "OralBoardAI.ipynb"
!git commit -m "Add OralBoardAI notebook"
!git push

Kernel: C:\Users\jmcam\.venv-tts\Scripts\python.exe
Found git at: C:\Program Files\Git\cmd\git.exe
git --version -> git version 2.51.0.windows.1
        1 file(s) copied.C:\Users\jmcam\Documents\ENGR-498





[main e36bad1] Add OralBoardAI notebook
 1 file changed, 35 insertions(+), 162 deletions(-)


To https://github.com/Jonah-Camacho/ENGR-498.git
 ! [rejected]        main -> main (fetch first)
error: failed to push some refs to 'https://github.com/Jonah-Camacho/ENGR-498.git'
hint: Updates were rejected because the remote contains work that you do not
hint: have locally. This is usually caused by another repository pushing to
hint: the same ref. If you want to integrate the remote changes, use
hint: 'git pull' before pushing again.
hint: See the 'Note about fast-forwards' in 'git push --help' for details.


In [57]:
%cd "C:\Users\jmcam\Documents\ENGR-498"
!git config core.autocrlf true
!git fetch origin
!git pull --rebase origin main
!git push -u origin main

C:\Users\jmcam\Documents\ENGR-498


From https://github.com/Jonah-Camacho/ENGR-498
   0e0eee7..5dd02a8  main       -> origin/main
From https://github.com/Jonah-Camacho/ENGR-498
 * branch            main       -> FETCH_HEAD
Rebasing (1/1)
Successfully rebased and updated refs/heads/main.


branch 'main' set up to track 'origin/main'.


To https://github.com/Jonah-Camacho/ENGR-498.git
   5dd02a8..8ad2df9  main -> main
