# RETRIVAL CODE

### Retrieve

In [107]:
import os
import re
import httpx
from pinecone import Pinecone
from dotenv import load_dotenv

load_dotenv()


True

In [108]:
# --- Configuration ---
PINECONE_API_KEY    = os.getenv("PINECONE_API_KEY")
NAMESPACE           = os.getenv("PINECONE_NAMESPACE", None)
INDEX_NAME          = os.getenv("PINECONE_INDEX_NAME", "rag-about-me")
SIM_THRESHOLD       = float(os.getenv("SIM_THRESHOLD", 0.4))
RETRIEVE_K          = int(os.getenv('RETRIEVE_K'))

DEEPINFRA_API_KEY   = os.getenv("DEEPINFRA_API_KEY")
DEEPINFRA_LLM_MODEL = os.getenv("DEEPINFRA_LLM_MODEL", "meta-llama/Meta-Llama-3-8B-Instruct")
DEEPINFRA_CHAT_URL  = os.getenv("DEEPINFRA_CHAT_URL", "https://api.deepinfra.com/v1/openai/chat/completions")

DEEPINFRA_EMBEDDING_MODEL = os.getenv("DEEPINFRA_EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
DEEPINFRA_EMBEDDING_URL   = os.getenv("DEEPINFRA_EMBEDDING_URL", "https://api.deepinfra.com/v1/openai/embeddings")


In [109]:
# --- Pinecone client ---
pc    = Pinecone(api_key=PINECONE_API_KEY)
index = pc.Index(INDEX_NAME)


In [110]:
query = 'Where can i try your voice time capsule?'

In [112]:
import asyncio

async def embed_query(query):
    # 1.) payload
    payload = {
        "input": query,
        "model": DEEPINFRA_EMBEDDING_MODEL ,
        "encoding_format": "float"
    }
    # 2.) headers
    headers = {
        "Authorization": f"Bearer {DEEPINFRA_API_KEY}",
        "Content-Type": "application/json"
    }
    # 3.) make request
    async with httpx.AsyncClient(timeout=30) as client:
        resp = await client.post(DEEPINFRA_EMBEDDING_URL,
                                 json=payload,
                                 headers=headers)
        return resp.json()["data"][0]["embedding"]


In [113]:
# payload to DeepInfra embedding API
payload = {
    "input": query,
    "model": DEEPINFRA_EMBEDDING_MODEL,
    "encoding_format": "float"
}

headers = {
    "Authorization": f"Bearer {DEEPINFRA_API_KEY}",
    "Content-Type": "application/json"
}

import asyncio

async def get_embedding():
    async with httpx.AsyncClient(timeout=30) as client:
        resp = await client.post(DEEPINFRA_EMBEDDING_URL,
                                 json=payload,
                                 headers=headers)
        return resp.json()["data"][0]["embedding"]

vec = await get_embedding()
vec[:5]  # just to peek at first few values


[-0.09176679700613022,
 -0.08257985860109329,
 0.010446778498589993,
 -0.034100234508514404,
 -0.028550829738378525]

In [114]:
ns = NAMESPACE or None

# Pinecode index
resp = index.query(
    vector=vec,
    top_k=RETRIEVE_K,
    include_metadata=True,
    namespace=ns
)

matches = resp.matches
matches


[{'id': '58ef9a0e-2c72-4e49-8323-ce572febef1e',
  'metadata': {'answer': 'heres the link: '
                         'https://voice-time-capsule-production.up.railway.app/ ',
               'question': 'Where can i try your voice time capsule?'},
  'score': 0.999978,
  'values': []},
 {'id': '0c4c9aed-4ce6-4079-92ae-8129536b3b90',
  'metadata': {'answer': 'I used Python, Railway for cloud deployment, '
                         'PostgreSQL for database, Basic HTML for UI, OpenAI '
                         'whisper for voice-to-text, TTS, and Eleven Labs API '
                         'for voice cloning and text to speech',
               'question': 'What tools and technologies did you use for the '
                           'Voice Time Capsule?'},
  'score': 0.690911651,
  'values': []}]

In [115]:
# for test only
SIM_THRESHOLD=0.4

good = []

for m in matches:
    if m.score >= SIM_THRESHOLD:
        print(f"PASS: {m.score:.4f} >= {SIM_THRESHOLD} ID: {m.id}")
        good.append(m)
    else:
        print(f"SKIP: {m.score:.4f} >= {SIM_THRESHOLD} ID: {m.id}")


PASS: 1.0000 >= 0.4 ID: 58ef9a0e-2c72-4e49-8323-ce572febef1e
PASS: 0.6909 >= 0.4 ID: 0c4c9aed-4ce6-4079-92ae-8129536b3b90


In [116]:
good

[{'id': '58ef9a0e-2c72-4e49-8323-ce572febef1e',
  'metadata': {'answer': 'heres the link: '
                         'https://voice-time-capsule-production.up.railway.app/ ',
               'question': 'Where can i try your voice time capsule?'},
  'score': 0.999978,
  'values': []},
 {'id': '0c4c9aed-4ce6-4079-92ae-8129536b3b90',
  'metadata': {'answer': 'I used Python, Railway for cloud deployment, '
                         'PostgreSQL for database, Basic HTML for UI, OpenAI '
                         'whisper for voice-to-text, TTS, and Eleven Labs API '
                         'for voice cloning and text to speech',
               'question': 'What tools and technologies did you use for the '
                           'Voice Time Capsule?'},
  'score': 0.690911651,
  'values': []}]

In [117]:
paired = [(m.metadata["question"], 
           m.metadata["answer"])
           for m in good
    ]

paired 

[('Where can i try your voice time capsule?',
  'heres the link: https://voice-time-capsule-production.up.railway.app/ '),
 ('What tools and technologies did you use for the Voice Time Capsule?',
  'I used Python, Railway for cloud deployment, PostgreSQL for database, Basic HTML for UI, OpenAI whisper for voice-to-text, TTS, and Eleven Labs API for voice cloning and text to speech')]

### Call DeepInfra LLM

In [118]:
prompt = query
prompt

'Where can i try your voice time capsule?'

In [120]:
context_blocks = [
    f"Q: {q}\nA: {a}"
    for q, a in paired
]

context_str = "\n\n".join(context_blocks)
context_str

'Q: Where can i try your voice time capsule?\nA: heres the link: https://voice-time-capsule-production.up.railway.app/ \n\nQ: What tools and technologies did you use for the Voice Time Capsule?\nA: I used Python, Railway for cloud deployment, PostgreSQL for database, Basic HTML for UI, OpenAI whisper for voice-to-text, TTS, and Eleven Labs API for voice cloning and text to speech'

In [122]:
messages = [
    {
        "role": "system",
        "content": (
            "You are Ayx, an AI-agent and ML/AI enthusiast. "
            "Answer **only** using the CONTEXT provided. "
            "Be friendly, approachable, and authentic; you may insert emojis when it adds to the tone. "
            "Keep your answers engaging and grounded in best practices or real-world knowledge. "
            "Always share my LinkedIn profile at the end: "
            "https://www.linkedin.com/in/alexis-mandario-b546881a8"
        )
    },
    {
        "role": "system",
        "content": f"CONTEXT:\n{context_str}"
    },
    {
        "role": "user",
        "content": prompt
    }
]

In [123]:
headers = {
    "Authorization": f"Bearer {DEEPINFRA_API_KEY}",
    "Content-Type": "application/json"
}

payload = {
    "model": DEEPINFRA_LLM_MODEL,
    "messages": messages,
    "temperature": 0.7,
    "top_p": 0.9,
    "max_tokens": 512,
    "stop": ["</s>", "<|im_end|>"]
}


In [124]:
import asyncio

async def get_llm_raw():
    async with httpx.AsyncClient(timeout=180) as client:
        resp = await client.post(DEEPINFRA_CHAT_URL,
                                 json=payload,
                                 headers=headers)
        return resp.json()

data = await get_llm_raw()
data


{'id': 'chatcmpl-R0MLnc9f57KndKSQIw3rspOM',
 'object': 'chat.completion',
 'created': 1752046887,
 'model': 'meta-llama/Meta-Llama-3-8B-Instruct',
 'choices': [{'index': 0,
   'message': {'role': 'assistant',
    'content': "🎉 You're eager to try out the Voice Time Capsule, aren't you? 😊 Well, I've got the link right here: https://voice-time-capsule-production.up.railway.app/ 👉 Just click on it and you'll be able to send and receive voice messages across time! 🕰️",
    'name': None,
    'tool_calls': None},
   'finish_reason': 'stop',
   'logprobs': None}],
 'usage': {'prompt_tokens': 201,
  'total_tokens': 270,
  'completion_tokens': 69,
  'estimated_cost': 1.0170000000000001e-05}}

In [125]:
result = data["choices"][0]["message"]["content"].strip()
result


"🎉 You're eager to try out the Voice Time Capsule, aren't you? 😊 Well, I've got the link right here: https://voice-time-capsule-production.up.railway.app/ 👉 Just click on it and you'll be able to send and receive voice messages across time! 🕰️"

### clean_llm_output

In [127]:
def clean_llm_output(result):
    p1      = re.compile(r'^(\W+\s*)?(Final Answer:|Final Response:)\s*',
                         flags=re.IGNORECASE)
    p2      = re.compile(r'^[^\w\s]+ ')
    lines   = result.splitlines()
    cleaned = [p2.sub("", p1.sub("", line)).strip() for line in lines]
    unique  = dict.fromkeys(filter(None, cleaned))
    return " ".join(unique).strip()

In [128]:
clean_llm_output(result)

"You're eager to try out the Voice Time Capsule, aren't you? 😊 Well, I've got the link right here: https://voice-time-capsule-production.up.railway.app/ 👉 Just click on it and you'll be able to send and receive voice messages across time! 🕰️"