# Backend: FastAPI + OpenAI (GPT) minimal chat API

1. Reads API keys & config from environment variables / .env
2. Supports simple single-turn conversation

In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
from openai import OpenAI
import os, uvicorn, threading
from dotenv import load_dotenv

# Load .env file if present
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

app = FastAPI()

class ChatRequest(BaseModel):
    message: str

@app.post("/chat")
def chat_endpoint(req: ChatRequest):
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[{"role": "user", "content": req.message}]
    )
    reply = completion.choices[0].message.content
    return {"response": reply}

def run_backend():
    uvicorn.run(app, host="0.0.0.0", port=8001)

thread = threading.Thread(target=run_backend, daemon=True)
thread.start()
print("✅ GPT Backend running at http://localhost:8001/chat")


✅ GPT Backend running at http://localhost:8001/chat


INFO:     Started server process [61758]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8001 (Press CTRL+C to quit)


# Extended GPT backend (Multi-turn convesation)

1. Reads API keys & config from environment variables / .env
2. Supports simple **multi-turn conversation**
3. Supports setting of **temperature & max_tokens**

In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
from openai import OpenAI
import os, uvicorn, threading
from dotenv import load_dotenv

# Load environment variables
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

app = FastAPI()

class ChatRequest(BaseModel):
    message: str
    temperature: float | None = 0.7 # [NEW]
    max_tokens: int | None = 200 # [NEW]

# [NEW] global history for a single conversation
HISTORY: list[dict[str, str]] = []

@app.post("/chat")
def chat_endpoint(req: ChatRequest):
    # append user message
    HISTORY.append({"role": "user", "content": req.message})

    print(req.temperature)
    print(req.max_tokens)

    # call GPT with accumulated history
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=HISTORY,
        temperature=float(req.temperature) if hasattr(req, "temperature") else 0.7, # [NEW]
        max_tokens=int(req.max_tokens) if hasattr(req, "max_tokens") else 200 # [NEW]
    )
    reply = completion.choices[0].message.content

    # append assistant reply
    HISTORY.append({"role": "assistant", "content": reply})

    return {"response": reply}

def run_backend():
    uvicorn.run(app, host="0.0.0.0", port=8001)

# Avoid duplicate threads in Jupyter
try:
    thread
    already_running = thread.is_alive()
except NameError:
    already_running = False

if not already_running:
    thread = threading.Thread(target=run_backend, daemon=True)
    thread.start()

print("✅ Minimal Multi-Turn GPT Backend running at http://localhost:8001/chat")


# Extended GPT backend (Multi-turn convesation + Sessions)

1. Reads API keys & config from environment variables / .env
2. Provides a single /chat endpoint
3. Supports simple **multi-turn context** via an in-memory session store

In [None]:
from fastapi import FastAPI
from pydantic import BaseModel
from openai import OpenAI
import os, uvicorn, threading
from dotenv import load_dotenv

# Load .env file if present
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

app = FastAPI()

class ChatRequest(BaseModel):
    message: str
    session_id: str | None = None   # [NEW] allow session_id from frontend

# [NEW] in-memory session store
SESSIONS: dict[str, list[dict[str, str]]] = {}

@app.post("/chat")
def chat_endpoint(req: ChatRequest):
    # [NEW] resolve session_id
    sid = req.session_id or "default"
    if sid not in SESSIONS:
        SESSIONS[sid] = []

    # [NEW] append new user message to session history
    history = SESSIONS[sid]
    history.append({"role": "user", "content": req.message})

    # [CHANGED] use history instead of single-turn
    completion = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=history
    )
    reply = completion.choices[0].message.content

    # [NEW] append assistant reply to session history
    history.append({"role": "assistant", "content": reply})

    return {"response": reply, "session_id": sid}  # [CHANGED] return session_id too

def run_backend():
    uvicorn.run(app, host="0.0.0.0", port=8001)

# Avoid re-running multiple uvicorn servers in Jupyter
try:
    thread
    already_running = thread.is_alive()
except NameError:
    already_running = False

if not already_running:
    thread = threading.Thread(target=run_backend, daemon=True)
    thread.start()

print("✅ GPT Backend with Multi-Turn running at http://localhost:8001/chat")


# Extended GPT backend (Multi-turn convesation + CORS)

1. Reads API keys & config from environment variables / .env
2. Provides a single /chat endpoint
3. Supports simple multi-turn context via an in-memory session store
4. **Safe error handling and CORS for local dev**

In [None]:
import os
import threading
from typing import Dict, List
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import uvicorn

from dotenv import load_dotenv
load_dotenv()  # load .env if present

# --- OpenAI new SDK (>= 1.0) ---
from openai import OpenAI

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
if not OPENAI_API_KEY:
    raise RuntimeError("OPENAI_API_KEY is missing. Put it in your environment or a .env file.")

OPENAI_BASE_URL = os.getenv("OPENAI_BASE_URL", "https://api.openai.com/v1")
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")

client = OpenAI(
    api_key=OPENAI_API_KEY,
    base_url=OPENAI_BASE_URL
)

# ------------- FastAPI app -------------
app = FastAPI(title="CS3249 GPT Backend")

# Allow local frontends to call this API
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],    # for teaching/demo; restrict in production
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Simple in-memory session store: { session_id: [{"role": "...", "content": "..."}] }
SESSIONS: Dict[str, List[Dict[str, str]]] = {}

class ChatRequest(BaseModel):
    message: str
    session_id: str | None = None  # optional: supply a session id to keep context

class ChatResponse(BaseModel):
    response: str
    session_id: str

def get_or_create_session(session_id: str | None) -> str:
    """Return a session_id and ensure it exists in memory."""
    if not session_id:
        # naive unique id; good enough for class demo
        session_id = f"sess-{len(SESSIONS) + 1}"
    if session_id not in SESSIONS:
        SESSIONS[session_id] = []
    return session_id

@app.post("/chat", response_model=ChatResponse)
def chat_endpoint(req: ChatRequest):
    try:
        # 1) resolve session + append the user message
        sid = get_or_create_session(req.session_id)
        history = SESSIONS[sid]

        # Build messages for Chat Completions API
        # Keep it minimal: system prompt optional; add if you want a consistent persona
        messages = [{"role": "system", "content": "You are a helpful, concise teaching assistant."}]
        messages.extend(history)
        messages.append({"role": "user", "content": req.message})

        # 2) call OpenAI Chat Completions
        completion = client.chat.completions.create(
            model=OPENAI_MODEL,
            messages=messages,
            temperature=0.7,
        )
        reply = completion.choices[0].message.content

        # 3) update session with assistant reply
        history.append({"role": "user", "content": req.message})
        history.append({"role": "assistant", "content": reply})

        return ChatResponse(response=reply, session_id=sid)

    except Exception as e:
        # Surface error to the caller but keep JSON shape stable
        return ChatResponse(response=f"[Backend error: {e}]", session_id=req.session_id or "unknown")

def run_backend():
    # Use a non-8000 port to avoid clashes with other demos
    uvicorn.run(app, host="0.0.0.0", port=8001, log_level="info")

# Start once per Notebook execution
try:
    thread
    already_running = thread.is_alive()
except NameError:
    already_running = False

if not already_running:
    thread = threading.Thread(target=run_backend, daemon=True)
    thread.start()

print("✅ GPT Backend running at http://localhost:8001/chat")
print(f"   Model: {OPENAI_MODEL} | Base URL: {OPENAI_BASE_URL}")
