In [32]:
%pip install openai tqdm numpy tiktoken pinecone fastapi uvicorn pyngrok pydantic -q

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip available: 22.3.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


####**IMPORTS**

In [None]:
import os, textwrap, math, openai, uvicorn
from tqdm.auto import tqdm
import numpy as np
from pinecone import Pinecone, ServerlessSpec
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse
from pydantic import BaseModel
from pyngrok import conf, ngrok
from pathlib import Path
import multiprocessing as mp
from google.colab import userdata

####**API KEYS**

In [None]:
# Pinecone (v3) initialization
pc = Pinecone(api_key=userdata.get("PINECONE_API_KEY"))

index_name = "java-embeddings"
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=1536,  # for text-embedding-3-small
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

index = pc.Index(index_name)

# Choose models
CHAT_MODEL   = "gpt-4o"
EMBED_MODEL  = "text-embedding-3-small"

####**HELPER FUNCTIONS**

In [35]:
def top_k_chunks(query, k=3):
    q_emb = openai.embeddings.create(model=EMBED_MODEL, input=query).data[0].embedding
    res = index.query(vector=q_emb, top_k=k, include_metadata=True)
    return [(match.metadata["text"], match.score) for match in res.matches]

def chat(messages, temp=0.0):
    resp = openai.chat.completions.create(
        model=CHAT_MODEL, messages=messages, temperature=temp
    )
    msg = resp.choices[0].message.content.strip()
    usage = resp.usage
    return msg, usage.prompt_tokens, usage.completion_tokens

def answer_kb_only(question, k=5):
    hits = top_k_chunks(question, k)
    return "\n\n-----------------\n".join(f"[Score {score:.3f}]\n{txt}" for txt, score in hits), 0, 0

def answer_rag(question, k=5):
    context = "\n".join("• " + txt.replace("\n", " ") for txt, _ in top_k_chunks(question, k))
    sys_prompt = (
        "You are an expert assistant. Use ONLY the facts below (plus your own language knowledge) to answer:\n\n"
        + context
    )
    msgs = [
        {"role": "system", "content": sys_prompt},
        {"role": "user",   "content": question}
    ]
    return chat(msgs)

def answer_no_context(question):
    msgs = [{"role": "user", "content": question}]
    return chat(msgs)

MODES = {
    "1": ("KB-only (retrieved snippets)",    answer_kb_only),
    "2": ("RAG (top-k chunks only)",          answer_rag),
    "3": ("No KB (just question to model)",   answer_no_context),
}

#mode = input("Choose mode (1-3): ").strip()
#title, fn = MODES[2]
#print(f"\n--- {title} ---\n")
#reply, in_tok, out_tok = fn(question)

####**SETUP SERVER**

In [38]:
app = FastAPI(title="AP CS Test Teacher")

app.mount("/static", StaticFiles(directory="static"), name="static")

@app.get("/")
def root():
    return FileResponse("static/index.html")

# Enable CORS for all origins
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allow all
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# OpenAI API key input
OPENAI_API_KEY = userdata.get("OPENAI_API_KEY")
openai.api_key = OPENAI_API_KEY

# Pydantic model for the request body
class PromptRequest(BaseModel):
    prompt: str

@app.post("/ask")
async def ask_openai(request: PromptRequest):
  print(request.prompt)
  answer, in_tok, out_tok = answer_rag(request.prompt)
  print(answer)
  return {"response": answer}

def start_api():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# Launch with ngrok
if __name__ == "__main__":
    NGROK_AUTH_TOKEN = userdata.get("NGROK_AUTH_TOKEN")
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    ngrok.kill()  # Clean any previous tunnels
    public_url = ngrok.connect(addr="8000", proto="http", bind_tls=True)
    print("Public URL:", public_url.public_url)

    # Start API in a separate process
    api_proc = mp.Process(target=start_api, daemon=True)
    api_proc.start()

    try:
        api_proc.join()
    except KeyboardInterrupt:
        print("\nShutting down…")
        api_proc.terminate()
        ngrok.kill()

t=2025-06-12T16:24:46-0400 lvl=eror msg="failed to reconnect session" obj=tunnels.session err="authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok start --all`.\nRead more about the agent configuration file: https://ngrok.com/docs/secure-tunnels/ngrok-agent/reference/config\nYou can view your current agent sessions in the dashboard:\nhttps://dashboard.ngrok.com/agents\r\n\r\nERR_NGROK_108\r\n"
t=2025-06-12T16:24:46-0400 lvl=eror msg="session closing" obj=tunnels.session err="authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok start --all`.\nRead more about the agent configuration fil

PyngrokNgrokError: The ngrok process errored on start: authentication failed: Your account is limited to 1 simultaneous ngrok agent sessions.\nYou can run multiple simultaneous tunnels from a single agent session by defining the tunnels in your agent configuration file and starting them with the command `ngrok start --all`.\nRead more about the agent configuration file: https://ngrok.com/docs/secure-tunnels/ngrok-agent/reference/config\nYou can view your current agent sessions in the dashboard:\nhttps://dashboard.ngrok.com/agents\r\n\r\nERR_NGROK_108\r\n.