In [None]:
!pip install -q sentence-transformers faiss-cpu boto3 fastapi uvicorn[standard] nest_asyncio pyngrok requests


In [None]:
from google.colab import files
uploaded = files.upload()  # selecione o arquivo cars.json do seu PC

import json
fname = list(uploaded.keys())[0]
with open(fname, 'r', encoding='utf-8') as f:
    cars = json.load(f)

print(f"Arquivo carregado: {fname}, registros: {len(cars)}")
# visualiza os primeiros 2
from pprint import pprint
pprint(cars[:2])


In [None]:
import re
def parse_price(p):
    if p is None:
        return None
    if isinstance(p, (int, float)):
        return float(p)
    s = str(p)
    s = s.replace('R$','').replace('r$','').replace(' ','')
    # troca separador de milhar e decimal: assume formato BR "123.456,78" ou "123456.78"
    s = s.replace('.','').replace(',','.')
    m = re.search(r'(\d+(\.\d+)?)', s)
    if m:
        return float(m.group(1))
    return None

for c in cars:
    c['Price'] = parse_price(c.get('Price', None))
    # garante chaves mínimas
    for k in ['Name','Model','Image','Location']:
        if k not in c: c[k] = ""

print("Amostra após normalização:")
from pprint import pprint
pprint(cars[:3])


In [None]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')  # rápido e pequeno — ótimo pra demo

# montar o texto que será embeddado (você pode ajustar: incluir descrições, ano, etc.)
texts = []
for c in cars:
    price_str = f"R${int(c['Price'])}" if c.get('Price') else ""
    texts.append(f"{c.get('Name','')} {c.get('Model','')} {c.get('Location','')} {price_str}")

print("Gerando embeddings (pode demorar alguns segundos dependendo do dataset)...")
embeddings = model.encode(texts, show_progress_bar=True, convert_to_numpy=True)
print("Embeddings gerados. shape:", embeddings.shape)


In [None]:
import faiss
import numpy as np
d = embeddings.shape[1]
index = faiss.IndexFlatL2(d)   # índice simples e sem treinamento (ok para datasets pequenos)
index.add(embeddings)
print("Índice FAISS criado. Número de vetores:", index.ntotal)

# salvar index e metadados localmente
faiss.write_index(index, 'cars.index')
np.save('cars_emb.npy', embeddings)
import json
with open('cars_meta.json', 'w', encoding='utf-8') as f:
    json.dump(cars, f, ensure_ascii=False)
print("Arquivos salvos: cars.index, cars_emb.npy, cars_meta.json")


In [None]:
# já temos `index`, `model` e `cars` na memória do notebook
def search_text(query, k=5):
    q_emb = model.encode([query], convert_to_numpy=True)
    D, I = index.search(q_emb, k)
    results = []
    for dist, idx in zip(D[0], I[0]):
        car = cars[idx].copy()
        car['score'] = float(dist)   # distância L2 (menor = mais perto)
        results.append(car)
    return results

# exemplo de teste
q = "BYD Dolphin São Paulo 100000"
res = search_text(q, k=5)
from pprint import pprint
pprint(res)


In [None]:
%%writefile app.py
# app.py
import os, json, time, requests
from typing import Optional
from fastapi import FastAPI
from pydantic import BaseModel
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# Optional OpenAI - se quiser usar LLM remoto para gerar respostas (RAG)
USE_OPENAI = bool(os.getenv("OPENAI_API_KEY"))
OPENAI_MODEL = os.getenv("OPENAI_MODEL", "gpt-4o-mini")  # exemplo, troque se desejar

if USE_OPENAI:
    import openai
    openai.api_key = os.getenv("OPENAI_API_KEY")

# optional n8n webhook to log events (set N8N_WEBHOOK_URL env var)
N8N_WEBHOOK = os.getenv("N8N_WEBHOOK_URL")

# load FAISS index and metadata
INDEX_PATH = os.getenv("INDEX_PATH", "cars.index")
META_PATH = os.getenv("META_PATH", "cars_meta.json")

print("Loading index:", INDEX_PATH)
index = faiss.read_index(INDEX_PATH)
with open(META_PATH, "r", encoding="utf-8") as f:
    cars = json.load(f)
print("Loaded cars:", len(cars))

# sentence transformer model (same used to build index)
embed_model = SentenceTransformer("all-MiniLM-L6-v2")

app = FastAPI(title="Car Search + Chat (RAG)")

def embed_text(texts):
    return embed_model.encode(texts, convert_to_numpy=True)

def log_event(payload):
    # tries n8n webhook if configured
    try:
        if N8N_WEBHOOK:
            requests.post(N8N_WEBHOOK, json=payload, timeout=5)
    except Exception as e:
        print("Failed to send to n8n:", e)

class SearchQuery(BaseModel):
    q: Optional[str] = ""
    k: Optional[int] = 5
    location: Optional[str] = None
    minPrice: Optional[float] = None
    maxPrice: Optional[float] = None

@app.post("/search")
def search(qobj: SearchQuery):
    q = (qobj.q or "").strip()
    k = qobj.k or 5

    # embed + retrieve when q provided (semantic search)
    if q:
        qe = embed_text([q])
        D, I = index.search(qe, k)
        results = []
        for dist, idx in zip(D[0], I[0]):
            item = cars[idx].copy()
            item["score"] = float(dist)
            results.append(item)
    else:
        # if no query, return top-k by price ascending as default
        results = sorted(cars, key=lambda x: x.get("Price", 1e12))[:k]

    # apply filters if present
    if qobj.location:
        results = [r for r in results if r.get("Location","").lower() == qobj.location.lower()]
    if qobj.minPrice:
        results = [r for r in results if r.get("Price") is not None and r["Price"] >= float(qobj.minPrice)]
    if qobj.maxPrice:
        results = [r for r in results if r.get("Price") is not None and r["Price"] <= float(qobj.maxPrice)]

    # fallback suggestion: if no results, suggest closest by price
    if len(results) == 0 and qobj.maxPrice:
        try:
            target = float(qobj.maxPrice)
            results = sorted(cars, key=lambda a: abs((a.get("Price") or 0) - target))[:5]
        except:
            pass

    # log event (non-blocking best-effort)
    try:
        log_event({
            "type": "search",
            "query": q,
            "filters": {"location": qobj.location, "minPrice": qobj.minPrice, "maxPrice": qobj.maxPrice},
            "resultCount": len(results),
            "ts": int(time.time())
        })
    except Exception as e:
        print("log error", e)

    return {"results": results}

class ChatQuery(BaseModel):
    message: str
    k: Optional[int] = 5

@app.post("/chat")
def chat(c: ChatQuery):
    user_message = c.message
    k = c.k or 5

    # embed + retrieve top-k
    q_emb = embed_text([user_message])
    D, I = index.search(q_emb, k)
    retrieved = []
    for j, idx in enumerate(I[0]):
        item = cars[idx].copy()
        item["score"] = float(D[0][j])
        retrieved.append(item)

    # Format context
    context_lines = []
    for r in retrieved:
        price = r.get("Price")
        price_str = f"R${int(price):,}".replace(",", ".") if price else "N/A"
        context_lines.append(f"- {r.get('Name')} {r.get('Model')} — {price_str} — {r.get('Location')}")

    context = "\n".join(context_lines)

    # If no OpenAI key -> return a templated answer
    if not USE_OPENAI:
        answer = f"Encontrei {len(retrieved)} veículos relevantes:\n{context}\n\nSe quiser, posso filtrar por cidade ou preço."
        log_event({"type":"chat","message":user_message,"answer":answer,"results_count":len(retrieved),"ts":int(time.time())})
        return {"answer": answer, "results": retrieved}

    # Build prompt for the LLM
    system_prompt = (
        "Você é um assistente conciso para busca de carros. Use os trechos abaixo (do inventário) para responder a intenção do usuário. "
        "Se o orçamento do usuário for menor que os preços disponíveis, sugira alternativas, negociações ou opções similares. "
        "Se o usuário pedir localidade, destaque veículos na localidade e, caso não haja, sugira alternativas em outras cidades com justificativa."
    )
    user_prompt = f"Inventário relevante:\n{context}\n\nUsuário: \"{user_message}\"\nResponda de forma objetiva (máx 200 palavras) e sugira 3 ações (ex: contatar vendedor, salvar alerta, ver similares)."

    # call OpenAI ChatCompletion (exemplo)
    try:
        resp = openai.ChatCompletion.create(
            model=OPENAI_MODEL,
            messages=[
                {"role":"system","content":system_prompt},
                {"role":"user","content":user_prompt}
            ],
            temperature=0.2,
            max_tokens=450,
        )
        answer = resp["choices"][0]["message"]["content"].strip()
    except Exception as e:
        answer = f"Erro ao acessar LLM: {e}. Exibindo resultados brutos:\n{context}"

    # log chat event
    log_event({"type":"chat","message":user_message,"answer":answer,"results_count":len(retrieved),"ts":int(time.time())})
    return {"answer": answer, "results": retrieved}



In [None]:
!fuser -n tcp -k 8000


In [None]:
# Baixa o binário oficial mais recente
!wget -q https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64.deb

# Instala o .deb
!dpkg -i cloudflared-linux-amd64.deb

# Confere se instalou
!cloudflared --version


In [None]:
import uvicorn, threading

def run():
    uvicorn.run("app:app", host="0.0.0.0", port=8000)

t = threading.Thread(target=run, daemon=True)
t.start()


In [None]:
!cloudflared tunnel --url http://localhost:8000 --no-autoupdate
