In [1]:
# ==============================================================================
# Imports
# ==============================================================================
import pickle
import numpy as np
import os
from typing import List, Dict, Any, Tuple
from pathlib import Path

# Library for Local Embeddings (Qwen)
from sentence_transformers import SentenceTransformer

# Library for Gemini Chat (Generation only)
from google import genai
from google.genai import types

# File paths
DATA_FILE = "transcript_embeddings_localQwen.pkl"

# Model Selection
# We only define the Chat model here. 
# The embedding model is loaded locally in a later step.
CHAT_MODEL = "gemini-flash-latest"

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# ==============================================================================
# API Key Setup
# ==============================================================================
def read_key_from_env_file(filename: str = "key.env", var_name: str = "GEMINI_API_KEY") -> str:
    """Reads the API key from a file on your computer."""
    p = Path.cwd() / filename
    if not p.exists():
        raise FileNotFoundError(f"'{filename}' not found in {Path.cwd()}")
    for line in p.read_text(encoding="utf-8").splitlines():
        line = line.strip()
        if not line or line.startswith("#"):
            continue
        if "=" in line:
            k, v = line.split("=", 1)
            if k.strip() == var_name:
                return v.strip().strip('"').strip("'")
    raise RuntimeError(f"{var_name} not found in {filename}")

# Load the key
API_KEY = read_key_from_env_file()
print("API Key loaded successfully.")

API Key loaded successfully.


In [3]:
# ==============================================================================
# Load Data
# ==============================================================================
if not os.path.exists(DATA_FILE):
    raise FileNotFoundError(f"Could not find '{DATA_FILE}'. Please run the previous notebook first.")

with open(DATA_FILE, "rb") as f:
    records = pickle.load(f)

print(f"Loaded {len(records)} customer records from '{DATA_FILE}'.")

Loaded 50 customer records from 'transcript_embeddings_localQwen.pkl'.


In [4]:
# ==============================================================================
# Initialize Local Embedding Model
# ==============================================================================
print("Loading local Qwen model... (this uses your RAM/CPU)")

# We load the model exactly as we did in the data preparation step.
# We trust remote code because Qwen is a custom architecture.
embedding_model = SentenceTransformer(
    "Qwen/Qwen3-Embedding-0.6B",
    trust_remote_code=True,
    tokenizer_kwargs={"padding_side": "left"}
)

print("Local model loaded successfully.")

Loading local Qwen model... (this uses your RAM/CPU)
Local model loaded successfully.


In [5]:
# ==============================================================================
# Helper Functions: Selection & Math
# ==============================================================================

def select_customer(records: List[Dict[str, Any]], selector: int) -> Dict[str, Any]:
    """
    Finds a customer by ID. If not found, uses the list index.
    """
    # 1. Try to find exact customer_id
    for rec in records:
        if rec["customer_id"] == selector:
            return rec

    # 2. Fallback: Use the row number
    idx = selector - 1
    assert 0 <= idx < len(records), f"Client index {selector} is out of range."
    return records[idx]

def l2_normalize(vecs: np.ndarray) -> np.ndarray:
    """
    Math helper: Adjusts the length of vectors to be exactly 1.0.
    This makes dot-product calculation equal to cosine similarity.
    """
    vecs = np.asarray(vecs, dtype=np.float32)
    # If it's a single vector (1D)
    if vecs.ndim == 1:
        denom = np.linalg.norm(vecs) + 1e-12
        return vecs / denom
    # If it's a matrix of vectors (2D)
    denom = np.linalg.norm(vecs, axis=1, keepdims=True) + 1e-12
    return vecs / denom

In [6]:
# ==============================================================================
# Helper Functions: Embedding & Ranking
# ==============================================================================

def embed_prompt(prompt: str) -> np.ndarray:
    """
    Embeds the user prompt using the LOCAL Qwen model.
    """
    # We use 'prompt_name="query"' because the Qwen model instructions 
    # recommend this for questions to improve accuracy.
    vector = embedding_model.encode(prompt, prompt_name="query")
    
    # We normalize it to match our math requirements
    return l2_normalize(vector)


def rank_transcripts_by_cosine(query_vec: np.ndarray, 
                               doc_vecs: np.ndarray) -> List[Tuple[int, float]]:
    """
    Compares the Question Vector (query_vec) against the 4 Transcript Vectors (doc_vecs).
    Returns the list sorted by best match.
    """
    # Normalize both sides to be safe
    q = l2_normalize(query_vec)
    D = l2_normalize(doc_vecs)

    # Calculate scores (Dot product)
    scores = (D @ q)
    
    # Sort them (Highest score first)
    order = np.argsort(-scores) 
    return [(int(i), float(scores[i])) for i in order]

In [7]:
# ==============================================================================
# User Inputs
# ==============================================================================

# 1. The question you want to ask about the customer
USER_PROMPT = "What home service does the customer require?"

# 2. Which customer to look at (ID or Row number)
CLIENT_SELECTOR = 19

In [8]:
# ==============================================================================
# Step 1: Retrieval (Find the best transcript)
# ==============================================================================

# 1. Get the customer record
rec = select_customer(records, CLIENT_SELECTOR)
doc_texts = rec["transcripts"]
# Load the embeddings we saved in the previous notebook
doc_vecs = np.array(rec["embeddings"], dtype=np.float32) 

print(f"Selected Customer ID: {rec['customer_id']}")

# 2. Embed the user prompt locally
q_vec = embed_prompt(USER_PROMPT)

# 3. Rank the transcripts
ranking = rank_transcripts_by_cosine(q_vec, doc_vecs)

# 4. Print the results
print("\n" + "=" * 80)
print("Top 4 transcripts ranked by Local Qwen Model")
print("=" * 80)

for rank, (idx, score) in enumerate(ranking, start=1):
    print(f"\n[{rank}] Transcript #{idx+1} — Similarity Score: {score:.4f}")
    # Print first 200 chars as preview
    preview = doc_texts[idx][:200].replace("\n", " ")
    print(f"Content: {preview}...")

Selected Customer ID: 19

Top 4 transcripts ranked by Local Qwen Model

[1] Transcript #2 — Similarity Score: 0.2746
Content: Hello? Yes, how can I help you? Yes, hi, is this the glass docr you over for [ORGANIZATION] in [LOCATION]? Yes, ma'am. Okay. Just wanted to make sure I had the right phone number because I just did a ...

[2] Transcript #1 — Similarity Score: 0.2618
Content: We are trying to contact your [OCCUPATION] [OCCUPATION] from [ORGANIZATION]. Hello, this is [PERSON_NAME] of [ORGANIZATION] [ORGANIZATION]. How can I help you? Hi, I was wondering if I have a notifica...

[3] Transcript #3 — Similarity Score: 0.2343
Content: SA Hello? Yeah, miss [PERSON_NAME]. Yes, dear. This is not a new type of insurance and you don't have to pay any kind of additional cost for this. These are some additional Medicare benefits which are...

[4] Transcript #4 — Similarity Score: 0.1612
Content: We will discuss. Hello? Hello? Yes, hello. Yes, hello, this is [PERSON_NAME] again. Am I speaking

In [9]:
# ==============================================================================
# Step 2: Generation (Ask Gemini to answer)
# ==============================================================================

# Identify the best transcript
best_idx = ranking[0][0]
best_transcript = doc_texts[best_idx]

# Create the prompt for Gemini
chat_input = f"""
You are assisting an internal call-center analysis workflow.

User prompt:
{USER_PROMPT}

Relevant transcript (Selected by local Qwen model as best match):
{best_transcript}

Guidelines:
- Rely primarily on the transcript content to answer the user prompt.
- If the transcript is missing details the prompt asks for, state what is missing.
- Keep the answer crisp and actionable.
"""

# Initialize the Gemini Client
client = genai.Client(api_key=API_KEY)

# Send to Gemini
print("\nSending best transcript to Gemini for analysis...\n")
response = client.models.generate_content(
    model=CHAT_MODEL,
    contents=chat_input
)

print("="*80)
print("Gemini Answer:")
print("="*80)
print(response.text)


Sending best transcript to Gemini for analysis...

Gemini Answer:
The customer requires **Automotive Glass Repair/Adjustment**, specifically fixing a loose bracket around the rearview mirror following a recent windshield replacement.

The service is scheduled as an **in-shop visit** at the organization's shop, not a home service.
