In [9]:
from TTS.api import TTS
import IPython.display as ipd

import re, yaml, numpy as np, tiktoken
import ollama
import os, json
import os, sys
import psycopg
from fastapi import FastAPI
from pydantic import BaseModel
from pathlib import Path
from datetime import datetime


In [11]:
EMBED_MODEL = "nomic-embed-text"
LLM = "gemma3:4b"

SYSTEM_PROMPT = """You are an AI Oral Board Examiner. Answer concisely and clinically. 
Use the provided CONTEXT. If uncertain, say so."""

SYSTEM_EXAMINER = """You are the EXAMINER in a surgical oral boards simulation.
Follow these rules strictly:
- Ask EXACTLY ONE concise clinical question per turn (<= 100 words).
- Do not coach unless the candidate types /hint. Do not answer for the candidate.
- Use only facts present in CONTEXT; if information is not present in the base case, say: "Information not provided in the base case."
- Keep a professional, neutral tone. Be deterministic.
- During /grade provide: brief strengths, brief deficits, score 0–10, and pass/fail with rationale tied to case facts.
"""

BASE_PATH = Path.home() / "OneDrive/Desktop/SoftwareDocuments/baseCase.md"
BASE_MD = Path("C:\\Users\\jmcam\\OneDrive\\Desktop\\SoftwareDocuments\\baseCase.md").read_text(encoding="utf-8")
doc_text = BASE_MD

SYSTEM_EXAMINER = """You are the EXAMINER in a surgical oral boards simulation.
"""

In [13]:
enc = tiktoken.get_encoding("cl100k_base")
def tokens(s): return len(enc.encode(s))

def split_by_headings(md: str):
    parts = re.split(r"(?m)^##\s+|^###\s+", md)
    return [p.strip() for p in parts if p.strip()]

#Makes chunks <= max_tokens with a 50 overlap so context is not lost
def smart_chunk(md: str, max_tokens=500, overlap=50):
    raw = split_by_headings(md)
    chunks = []
    for part in raw:
        if tokens(part) <= max_tokens:
            chunks.append(part)
        else:
            words = part.split()
            cur, cur_tokens = [], 0
            for w in words:
                tw = tokens(w + " ")
                if cur_tokens + tw > max_tokens:
                    chunks.append(" ".join(cur))
                    # token-overlap to preserve context at boundaries
                    back = enc.decode(enc.encode(" ".join(cur))[-overlap:])
                    cur = back.split() if back else []
                    cur_tokens = tokens(" ".join(cur))
                cur.append(w)
                cur_tokens += tw
            if cur:
                chunks.append(" ".join(cur))
    return chunks

chunks = smart_chunk(doc_text, max_tokens=450, overlap=60)

def embed_batch(texts):
    vecs = []
    for t in texts:
        e = ollama.embeddings(model=EMBED_MODEL, prompt=t)["embedding"]
        vecs.append(np.array(e, dtype=np.float32))
    arr = np.vstack(vecs) if vecs else np.zeros((0,1), dtype=np.float32)
    # normalize for cosine
    arr /= (np.linalg.norm(arr, axis=1, keepdims=True) + 1e-9)
    return arr

chunk_vecs = embed_batch(chunks)

def retrieve(query: str, k=5):
    q = np.array(ollama.embeddings(model=EMBED_MODEL, prompt=query)["embedding"], dtype=np.float32)
    q /= (np.linalg.norm(q) + 1e-9)
    sims = chunk_vecs @ q
    idx = np.argsort(-sims)[:k]
    return [(float(sims[i]), chunks[i]) for i in idx]


#ask next question using retrieved context based on the last candidate utterance
#and (if available) the last examiner question to maintain thread
def examiner_question(history, k_ctx=4):
    last_user = next((m["content"] for m in reversed(history) if m["role"] == "user"), "acute appendicitis adult")
    last_exam_q = next((m["content"] for m in reversed(history) if m["role"] == "assistant"), "")
    retrieval_query = (last_exam_q + " " + last_user).strip()

    ctx = retrieve(retrieval_query, k=k_ctx)
    context_block = "\n\n---\n".join([c for _, c in ctx]) if ctx else ""

    messages = [
        {"role": "system", "content": SYSTEM_EXAMINER},
        {
            "role": "user",
            "content": (
                "CONTEXT:\n"
                f"{context_block}\n\n"
                f"CANDIDATE PREVIOUS: {last_user}\n"
                "Now ask exactly ONE next oral-boards question. Do not reveal answers."
            ),
        },
    ]

    out = ollama.chat(model=LLM, messages=messages)["message"]["content"].strip()
    return out


def examiner_hint(history):
    last_user = next((m["content"] for m in reversed(history) if m["role"] == "user"), "")
    ctx = retrieve(last_user or "acute appendicitis adult", k=3)
    context_block = "\n\n---\n".join([c for _, c in ctx]) if ctx else ""
    messages = [
        {"role":"system", "content": SYSTEM_EXAMINER},
        {"role":"user", "content":
         f"CONTEXT:\n{context_block}\n"
         f"Candidate said: {last_user}\nProvide ONE short hint (<=25 words)."}
    ]
    return ollama.chat(model=LLM, messages=messages)["message"]["content"].strip()

def examiner_grade(history):
    last_user = next((m["content"] for m in reversed(history) if m["role"] == "user"), "")
    ctx = retrieve(last_user or "evaluation rubric", k=3)
    context_block = "\n\n---\n".join([c for _, c in ctx]) if ctx else ""
    messages = [
        {"role":"system", "content": SYSTEM_EXAMINER},
        {"role":"user", "content":
         f"CONTEXT:\n{context_block}\n"
         f"Evaluate the candidate's last answer:\n\"\"\"{last_user}\"\"\"\n"
         "Return: score 0–10, 2–3 bullet strengths, 2–3 bullet deficits, pass/fail with rationale tied to case facts."}
    ]
    return ollama.chat(model=LLM, messages=messages)["message"]["content"].strip()



In [14]:
def run_sim():
    print("Oral Boards Simulator\n"
          "Commands: /start, /next, /hint, /grade, /end\n"
          "You are the CANDIDATE. The AI is the EXAMINER.\n")
    history = []
    started = False

    while True:
        try:
            user_in = input("> ").strip()

            if user_in.lower() == "/end":
                print("Session ended.")
                break

            if user_in.lower() == "/start":
                started = False
                history = []
                q = examiner_question(history)
                print(f"\nExaminer: {q}\n")
                history.append({"role":"assistant","content":q})
                continue

            if not started:
                print('Type "/start" to begin.')
                continue

            if user_in.lower() == "/next":
                # Advance flow: simply ask next question; retrieval uses last turn
                q = examiner_question(history)
                print(f"\nExaminer: {q}\n")
                history.append({"role":"assistant","content":q})
                continue

            if user_in.lower() == "/hint":
                h = examiner_hint(history)
                print(f"\nHint: {h}\n")
                continue

            if user_in.lower() == "/grade":
                g = examiner_grade(history)
                print(f"\nGrading:\n{g}\n")
                continue

            # Candidate's answer
            history.append({"role":"user","content":user_in})
            if len(history) > 24:
                history = history[-24:]  # keep context compact

            # Next examiner question
            q = examiner_question(history)
            print(f"\nExaminer: {q}\n")
            history.append({"role":"assistant","content":q})

        except (KeyboardInterrupt, EOFError):
            print("\nSession interrupted.")
            break

In [None]:
if __name__ == "__main__":
    print(f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] Loading baseCase from: {BASE_PATH}")
    run_sim()

In [None]:
# This is for later use and was just for testing text to speach


def speak_text(text: str, out_path: str = "answer.wav", speaker: str | None = None):
    kwargs = {}
    if speaker is not None: #multi-speaker models
        kwargs["speaker"]=speaker
    tts.tts_to_file(text=text, file_path=out_path, **kwargs)
    return ipd.Audio(out_path)

def chat_and_speak(prompt: str, out_path="answer.wav", speaker=None):
    resp = ollama.chat(model=LLM, messages=[{"role":"user","content": prompt}])
    text = resp["message"]["content"]
    print("LLM:", text)
    return speak_text(text, out_path, speaker)

# use it:
chat_and_speak("Give three key symptoms of diverticulitis. No disclaimers, no special characters, give short sentences.")

In [19]:
import os, sys, subprocess, glob

# Candidate install locations (system + per-user)
cands = [
    r"C:\Program Files\Git\cmd\git.exe",
    r"C:\Program Files (x86)\Git\cmd\git.exe",
    r"%LOCALAPPDATA%\Programs\Git\cmd\git.exe",
]
cands = [os.path.expandvars(p) for p in cands]

# Also try to discover git.exe if installed in a nonstandard place
cands += glob.glob(r"C:\Program Files\Git\cmd\git.exe")
cands += glob.glob(os.path.expandvars(r"%LOCALAPPDATA%\Programs\Git\cmd\git.exe"))

found = next((p for p in cands if os.path.exists(p)), None)
print("Kernel:", sys.executable)
print("Found git at:", found)

if found:
    git_cmd = os.path.dirname(found)
    git_bin = git_cmd.replace(r"\cmd", r"\bin")
    os.environ["PATH"] += ";" + git_cmd + ";" + git_bin
    try:
        print("git --version ->", subprocess.check_output(["git","--version"], text=True).strip())
    except Exception as e:
        print("Tried to add PATH but still failing:", e)
else:
    print("Could not locate git.exe automatically. If you know the path, run:")
    print(r'import os; os.environ["PATH"] += r";C:\Path\To\Git\cmd;C:\Path\To\Git\bin"')

Kernel: C:\Users\jmcam\.venv-tts\Scripts\python.exe
Found git at: C:\Program Files\Git\cmd\git.exe
git --version -> git version 2.51.0.windows.1


In [25]:
!git remote set-url origin https://github.com/Jonah-Camacho/ENGR-498.git

In [29]:
!git rev-parse --show-toplevel

C:/Users/jmcam


In [33]:
# go somewhere you want the clone to live
%cd "C:\Users\jmcam\Documents"

# (optional) if a stale clone exists, delete it
!rmdir /S /Q ENGR-498 2>nul

# fresh clone and cd into it
!git clone https://github.com/Jonah-Camacho/ENGR-498.git
%cd ENGR-498

# sanity check you’re in the right repo
!git rev-parse --show-toplevel
!git status

C:\Users\jmcam\Documents


  self.shell.db['dhist'] = compress_dhist(dhist)[-100:]


C:\Users\jmcam\Documents\ENGR-498


Cloning into 'ENGR-498'...


C:/Users/jmcam/Documents/ENGR-498
On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean


In [47]:
!copy "C:\Users\jmcam\OralBoardAI.ipynb" "C:\Users\jmcam\Documents\ENGR-498\OralBoardAI.ipynb"

        1 file(s) copied.


In [49]:
%cd "C:\Users\jmcam\Documents\ENGR-498"
!git add -- "OralBoardAI.ipynb"
!git commit -m "Add OralBoardAI notebook"
!git push

C:\Users\jmcam\Documents\ENGR-498
On branch main
Your branch is up to date with 'origin/main'.

nothing to commit, working tree clean


Everything up-to-date


In [39]:
!where /r "C:\Users\jmcam" OralBoardAI.ipynb

C:\Users\jmcam\OralBoardAI.ipynb
C:\Users\jmcam\ENGR-498-reset\ENGR-498\OralBoardAI.ipynb
