**Goal**

Build a simple question-answering system that can answer natural-language questions about member data provided by our public API. Examples of the questions include:

“When is Layla planning her trip to London?”

“How many cars does Vikram Desai have?”

“What are Amira’s favorite restaurants?”

Your system should expose a simple API endpoint (for example /ask) that we can call with a question and receive an answer.

**API**

Use the GET /messages endpoint described in Swagger: 👉 https://november7-730026606190.europe-west1.run.app/docs#/default/get_messages_messages__get

In [1]:
import requests
import pandas as pd

BASE_URL = "https://november7-730026606190.europe-west1.run.app/messages/"

def fetch_messages(skip=0, limit=100):
    resp = requests.get(BASE_URL, params={"skip": skip, "limit": limit})
    resp.raise_for_status()
    return resp.json()

data = fetch_messages()
df = pd.DataFrame(data["items"])
df.head()


Unnamed: 0,id,user_id,user_name,timestamp,message
0,b1e9bb83-18be-4b90-bbb8-83b7428e8e21,cd3a350e-dbd2-408f-afa0-16a072f56d23,Sophia Al-Farsi,2025-05-05T07:47:20.159073+00:00,Please book a private jet to Paris for this Fr...
1,609ba052-c9e7-49e6-8b62-061eb8785b63,e35ed60a-5190-4a5f-b3cd-74ced7519b4a,Fatima El-Tahir,2024-11-14T20:03:44.159235+00:00,Can you confirm my dinner reservation at The F...
2,44be0607-a918-40fa-a122-b2435fe54f3e,23103ae5-38a8-4d82-af82-e9942aa4aefb,Armand Dupont,2025-03-09T02:25:23.159256+00:00,I need two tickets to the opera in Milan this ...
3,a1579c1b-7f25-4d92-b421-0982f8fbf566,5b2e7346-eef5-445d-a063-6c5267f04bf8,Hans Müller,2025-08-02T05:20:44.159269+00:00,Could you check why my recent payment hasn't b...
4,43d8a12e-4fdb-4c82-8a78-f7dfff583b9f,fc15e14c-f56f-4137-a7cd-797f90b61c93,Layla Kawaguchi,2025-04-10T06:52:16.159280+00:00,Please remember I prefer aisle seats during my...


In [2]:
!pip install sentence-transformers fastapi uvicorn nest_asyncio pyngrok


Collecting pyngrok
  Downloading pyngrok-7.4.1-py3-none-any.whl.metadata (8.1 kB)
Downloading pyngrok-7.4.1-py3-none-any.whl (25 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.4.1


In [3]:
from sentence_transformers import SentenceTransformer
import numpy as np

model = SentenceTransformer("all-MiniLM-L6-v2")

messages = df["message"].tolist()
embeddings = model.encode(messages, convert_to_numpy=True)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [4]:
from fastapi import FastAPI
from pydantic import BaseModel
import numpy as np

app = FastAPI()

class Query(BaseModel):
    question: str

@app.post("/ask")
def ask(payload: Query):
    q = payload.question
    q_emb = model.encode([q])[0]

    # compute cosine similarity
    sims = embeddings @ q_emb / (
        np.linalg.norm(embeddings, axis=1) * np.linalg.norm(q_emb)
    )
    idx = np.argmax(sims)

    best_message = messages[idx]

    return {"answer": best_message}


In [5]:
import os
import re
from typing import Any, Dict, List, Optional

import httpx
from fastapi import FastAPI, HTTPException, Query

# Base URL for the public messages API.
# Override via env var if needed.
MEMBER_API_BASE = os.getenv(
    "MEMBER_API_BASE",
    "https://november7-730026606190.europe-west1.run.app",
)

MESSAGES_ENDPOINT = f"{MEMBER_API_BASE}/messages"

# Very small stopword list for scoring
STOPWORDS = {
    "the", "a", "an", "of", "and", "or", "to", "in", "on",
    "for", "with", "at", "is", "are", "was", "were", "do",
    "does", "did", "be", "have", "has", "had", "how", "what",
    "when", "where", "who", "why", "which",
}

app = FastAPI(
    title="Member QA Service",
    description="Simple question-answering over member messages.",
    version="0.1.0",
)


def tokenize(text: str) -> List[str]:
    """Lowercase, remove non-word chars, split, drop stopwords."""
    tokens = re.findall(r"\w+", text.lower())
    return [t for t in tokens if t not in STOPWORDS]


def extract_message_text(msg: Dict[str, Any]) -> str:
    """
    Try a few common field names to get the actual message text.
    Adjust this if your schema differs.
    """
    for key in ("text", "message", "content", "body"):
        if key in msg and isinstance(msg[key], str):
            return msg[key]
    # Fallback: stringify whole object (not ideal, but robust)
    return str(msg)


def normalize_messages_payload(raw: Any) -> List[Dict[str, Any]]:
    """
    Handle either:
      - {"messages": [...]} or
      - [...]
    """
    if isinstance(raw, dict) and "messages" in raw:
        messages = raw["messages"]
    else:
        messages = raw

    if not isinstance(messages, list):
        raise ValueError("Unexpected /messages API format: not a list")

    return [m for m in messages if isinstance(m, dict)]


def score_question_vs_message(question_tokens: List[str], msg_tokens: List[str]) -> float:
    """
    Very simple relevance score: Jaccard-like overlap.
    """
    if not msg_tokens:
        return 0.0
    q_set = set(question_tokens)
    m_set = set(msg_tokens)
    intersection = q_set & m_set
    union = q_set | m_set
    if not union:
        return 0.0
    return len(intersection) / len(union)


def find_best_message(question: str, messages: List[Dict[str, Any]]) -> Optional[Dict[str, Any]]:
    q_tokens = tokenize(question)
    if not q_tokens:
        return None

    best_msg = None
    best_score = 0.0

    for msg in messages:
        text = extract_message_text(msg)
        tokens = tokenize(text)
        score = score_question_vs_message(q_tokens, tokens)
        if score > best_score:
            best_score = score
            best_msg = msg

    # You can tune this threshold; below it we say "I don't know"
    if best_score < 0.05:
        return None

    return best_msg


@app.get("/ask")
async def ask(question: str = Query(..., min_length=3)) -> Dict[str, str]:
    """
    Example:
      GET /ask?question=When%20is%20Layla%20planning%20her%20trip%20to%20London%3F

    Response:
      { "answer": "Layla is planning her trip to London on 2025-04-12." }
    """
    async with httpx.AsyncClient(timeout=10.0) as client:
        try:
            resp = await client.get(MESSAGES_ENDPOINT)
            resp.raise_for_status()
        except httpx.HTTPError as e:
            raise HTTPException(
                status_code=502,
                detail=f"Error calling member API: {e}",
            )

    try:
        raw = resp.json()
        messages = normalize_messages_payload(raw)
    except Exception as e:  # noqa: BLE001
        raise HTTPException(
            status_code=500,
            detail=f"Failed to parse /messages response: {e}",
        )

    best_msg = find_best_message(question, messages)

    if not best_msg:
        return {"answer": "I could not find an answer to that question in the member messages."}

    answer_text = extract_message_text(best_msg)
    return {"answer": answer_text}


In [8]:
from fastapi import FastAPI
import requests
import re
import spacy

app = FastAPI()
nlp = spacy.load("en_core_web_sm")

API_URL = "https://november7-730026606190.europe-west1.run.app/messages"

# Extract person names from question
def extract_name(question):
    doc = nlp(question)
    for ent in doc.ents:
        if ent.label_ == "PERSON":
            return ent.text
    return None

# Detect intent of the question
def detect_intent(question):
    q = question.lower()
    if "when" in q:
        return "date"
    if "how many" in q or "number of" in q:
        return "count"
    if "favorite" in q or "favourite" in q:
        return "favorite"
    if "trip" in q or "travel" in q:
        return "travel"
    return "general"

# Fetch all messages from API
def fetch_all_messages():
    messages = []
    skip = 0
    limit = 100
    while True:
        res = requests.get(API_URL, params={"skip": skip, "limit": limit}).json()
        items = res.get("items", [])
        messages.extend(items)
        if len(items) < limit:
            break
        skip += limit
    return messages

# Main API endpoint
@app.get("/ask")
def ask(question: str):
    name = extract_name(question)
    if not name:
        return {"answer": "I could not identify the member name in the question."}

    intent = detect_intent(question)
    messages = fetch_all_messages()

    # Filter messages by user name
    user_msgs = [m["message"] for m in messages if m["user_name"].lower().startswith(name.lower())]

    if not user_msgs:
        return {"answer": f"No messages found for {name}."}

    text = " ".join(user_msgs)

    # Extract answer based on intent
    if intent == "date":
        date = re.search(r"(\b\w+ \d{1,2}\b|\d{1,2}/\d{1,2}/\d{2,4})", text)
        if date:
            return {"answer": f"{name} mentioned: {date.group(0)}"}

    if intent == "count":
        count = re.search(r"\b\d+\b", text)
        if count:
            return {"answer": f"{name} has {count.group(0)}."}

    if intent == "favorite":
        m = re.search(r"(favorite.*?\.|favourite.*?\.|I love .*?\.|my favorite.*?\.)", text, re.I)
        if m:
            return {"answer": m.group(0)}

    if intent == "travel":
        m = re.search(r"(trip.*?\.|travel.*?\.|going to .*?\. )", text, re.I)
        if m:
            return {"answer": m.group(0)}

    # Fallback
    return {"answer": text[:300] + "..."}
