In [None]:
!pip install --quiet sentence-transformers faiss-cpu transformers langchain langchain-ollama langchain_community ollama fastapi uvicorn python-multipart nest-asyncio pyngrok requests langchain-huggingface

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
import json
# adjust this path to wherever your unified JSON IS
INPUT_JSON = '/content/drive/MyDrive/AmazonReviews/final_dataset.json'

with open(INPUT_JSON, 'r') as f:
    data = json.load(f)

print(f"Loaded {len(data)} products")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Loaded 213593 products


In [None]:
def make_doc_text(prod):
    parts = []
    parts.append(f"Title: {prod.get('title','')}")
    parts.append(f"Category: {prod.get('main_category','')}")
    parts.append(
        f"Overall sentiment: {prod.get('overall_sentiment','unknown')} "
        f"(avg rating {prod.get('average_rating','N/A')})"
    )
    parts.append("Features: " + ". ".join(prod.get('features', [])))
    parts.append("Description: " + " ".join(prod.get('description', [])))
    # Details
    detail_items = prod.get('details', {})
    if isinstance(detail_items, dict):
        detail_str = "; ".join(f"{k}: {v}" for k, v in detail_items.items())
        parts.append("Details: " + detail_str)
    # Sample up to 5 reviews
    for r in prod.get('reviews', [])[:5]:
        label = r.get('bert_sentiment', {}).get('label', r.get('sentiment','unknown'))
        txt   = r.get('cleaned_text', '')
        parts.append(f"Review ({label}): {txt}")
    # Named entities
    ents = prod.get('named_entities', [])
    ent_str = ", ".join(e for e, _ in ents)
    parts.append("Entities: " + ent_str)
    return "\n".join(parts)

# Now build your docs list safely:
docs = []
for i, prod in enumerate(data, 1):
    try:
        docs.append(make_doc_text(prod))
    except Exception as e:
        print(f"Error building doc for product {i}: {e}")
        docs.append("")  # fallback empty
print("Built", len(docs), "document texts; sample:\n", docs[0], "…")

Built 213593 document texts; sample:
 Title: Pearl Export Lacquer EXL725S/C249 5-Piece New Fusion Drum Set with Hardware, Honey Amber
Category: Musical Instruments
Overall sentiment: positive (avg rating 4.2)
Features: Item may ship in more than one box and may arrive separately. (22x18, 10x7, 12x8, 16x16, 14x5.5). P930 Demonator Pedal. 830 Hardware Pack. Matching snare, REMO snare batter side head
Description: Introducing the best selling drum set of all time... Export Series returns and this time with a lacquer finish. EXL Export Lacquer Series incorporates Pearl's S.S.T. Superior Shell Technology, Opti-Loc tom mounts, all-new 830 Series Hardware with a P-930 Pedal, and a choice of three amazing stocking finishes.
Details: Item Weight: 33 pounds; Product Dimensions: 22 x 22 x 20 inches; Item model number: EXL725S/C249; Best Sellers Rank: {'Musical Instruments': 150300, 'Drum Sets': 521}; Date First Available: April 4, 2014; Color Name: Honey Amber; Material Type: Pearl; Size: -inch; 

In [None]:
# 2.1: Import & load your documents
from sentence_transformers import SentenceTransformer
import numpy as np

# Assuming you already have `docs` as a list of strings
# e.g. docs = [make_doc_text(p) for p in data]

print("Number of docs to embed:", len(docs))

# 2.2: Load the model and compute embeddings
embedder = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embedder.encode(
    docs,
    batch_size=64,
    convert_to_numpy=True,
    show_progress_bar=True
)

print("Computed embeddings shape:", embeddings.shape)

# 2.3: Save embeddings to disk (Colab working dir or Drive)
# np.save("embeddings.npy", embeddings)
# or to Google Drive:
np.save("/content/drive/MyDrive/AmazonReviews/embeddings.npy", embeddings)

print("Embeddings saved to embeddings.npy")

Number of docs to embed: 213593


Batches:   0%|          | 0/3338 [00:00<?, ?it/s]

Computed embeddings shape: (213593, 384)
Embeddings saved to embeddings.npy


In [None]:
import numpy as np

# 3.1: Load the embeddings back
# embeddings = np.load("embeddings.npy")
# or from Drive:
embeddings = np.load("/content/drive/MyDrive/AmazonReviews/embeddings.npy")

# 3.2: Verify shape
print("Reloaded embeddings shape:", embeddings.shape)

Reloaded embeddings shape: (213593, 384)


In [None]:
import faiss
import numpy as np

# Load and normalize embeddings
embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)

# Create HNSW index
d = embeddings.shape[1]
index = faiss.IndexHNSWFlat(d, 32)  # 32 links per vector
index.hnsw.efConstruction = 200  # Better accuracy
index.hnsw.efSearch = 64  # Balance speed and accuracy

# Add embeddings
index.add(embeddings)
print("FAISS HNSW index ntotal:", index.ntotal)

# Save to Drive
faiss.write_index(index, "/content/drive/MyDrive/AmazonReviews/products_hnsw.index")
print("FAISS index saved to products_hnsw.index")

NameError: name 'embeddings' is not defined

In [None]:
import faiss

# 4.1: Reload the HNSW index
# idx = faiss.read_index("products.index")
# or from Drive:
idx = faiss.read_index("/content/drive/MyDrive/AmazonReviews/products_hnsw.index")

# 4.2: Inspect properties
print("Re-loaded FAISS index ntotal:", idx.ntotal)
print("Index dimension:", idx.d)

Re-loaded FAISS index ntotal: 213593
Index dimension: 384


In [None]:
import pickle
import os

# Path where you want to save the pickle
PICKLE_PATH = "/content/drive/MyDrive/AmazonReviews/products.pkl" # or "products.pkl"

# Bundle docs and metadata into one dict
store = {
    "docs": docs,  # list of document strings
    "meta": data   # original JSON‑loaded list of product dicts
}

# Write to disk
with open(PICKLE_PATH, "wb") as f:
    pickle.dump(store, f)

# Optional: confirm file size
print("Saved pickle:", PICKLE_PATH)
print("Size:", os.path.getsize(PICKLE_PATH) / (1024*1024), "MB")

Saved pickle: /content/drive/MyDrive/AmazonReviews/products.pkl
Size: 1886.905634880066 MB


In [None]:
import pickle

PICKLE_PATH = "/content/drive/MyDrive/AmazonReviews/products.pkl"  # same path as above

# Load from disk
with open(PICKLE_PATH, "rb") as f:
    store = pickle.load(f)


print("Pickle keys:", list(store.keys()))

# Unpack
docs = store["docs"]
data = store["meta"]

# Quick sanity check
print("Loaded docs count:", len(docs))
print("Loaded meta entries:", len(data))

Pickle keys: ['docs', 'meta']
Loaded docs count: 213593
Loaded meta entries: 213593


In [None]:
!sudo apt update
!sudo apt install -y pciutils
!curl -fsSL https://ollama.com/install.sh | sh
!pip install langchain-ollama ollama langchain langchain_community

[33m0% [Working][0m            Hit:1 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease
Hit:2 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease
Get:3 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:4 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Hit:6 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:7 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:8 https://r2u.stat.illinois.edu/ubuntu jammy InRelease
Hit:9 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Hit:10 http://archive.ubuntu.com/ubuntu jammy-backports InRelease
Get:11 http://archive.ubuntu.com/ubuntu jammy-updates/universe amd64 Packages [1,543 kB]
Get:12 http://archive.ubuntu.com/ubuntu jammy-updates/main amd64 Packages [3,154 kB]
Fetched 4,954 kB in 4s (1,403 kB/s)
Reading package lists... 

In [None]:
import subprocess
import time
import threading

def run_ollama():
    subprocess.run(["ollama", "serve"])

# Start Ollama in a daemon thread
thread = threading.Thread(target=run_ollama, daemon=True)
thread.start()
time.sleep(5)  # Wait for Ollama to initialize

In [None]:
!ollama pull phi

[?2026h[?25l[1Gpulling manifest ⠋ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠙ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠹ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠸ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠼ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠴ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠦ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠧ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠇ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest ⠏ [K[?25h[?2026l[?2026h[?25l[1Gpulling manifest [K
pulling 04778965089b: 100% ▕▏ 1.6 GB                         [K
pulling 7908abcab772: 100% ▕▏ 1.0 KB                         [K
pulling 774a15e6f1e5: 100% ▕▏   77 B                         [K
pulling 3188becd6bae: 100% ▕▏  132 B                         [K
pulling 0b8127ddf5ee: 100% ▕▏   42 B                         [K
pulling 4ce4b16d33a3: 100% ▕▏  555 B                         [K
verifying sha256 digest [K
wri

In [None]:
from langchain.llms import Ollama

llm = Ollama(
    model="phi",
    base_url="http://localhost:11434",
    temperature=0.0,  # Deterministic responses
    num_predict=200    # Limit output tokens
)

print("Ollama phi LLM is ready:", llm)

Ollama phi LLM is ready: [1mOllama[0m
Params: {'model': 'phi', 'format': None, 'options': {'mirostat': None, 'mirostat_eta': None, 'mirostat_tau': None, 'num_ctx': None, 'num_gpu': None, 'num_thread': None, 'num_predict': 200, 'repeat_last_n': None, 'repeat_penalty': None, 'temperature': 0.0, 'stop': None, 'tfs_z': None, 'top_k': None, 'top_p': None}, 'system': None, 'template': None, 'keep_alive': None, 'raw': None}


  llm = Ollama(


In [None]:
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Create embedder
embedder = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# Create metadata list
metadatas = [{"parent_asin": p["parent_asin"]} for p in data]

# Load vectorstore
vectorstore = FAISS.load_local(
    "/content/drive/MyDrive/AmazonReviews/faiss_vectorstore",
    embeddings=embedder,
    allow_dangerous_deserialization=True
)

# Load HNSW index
vectorstore.index = faiss.read_index("/content/drive/MyDrive/AmazonReviews/products_hnsw.index")

print("Vectorstore reloaded with HNSW index.")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Vectorstore reloaded with HNSW index.


In [None]:
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# 1. Ultra-Strict Prompt
template = """You are a product assistant. Your only job is to answer questions about products using the context provided below.
Do not use any external knowledge, general information, or assumptions under any circumstances.
If the context is empty, does not contain information directly relevant to the question, or if the question is not about products, respond EXACTLY with: "I’m sorry, I can only assist with product-related queries."
Do not elaborate, speculate, or provide any other response. ALWAYS PROVIDE CONCISE YET INFORMATIVE FOR PRODUCT QUERIES. If any response has "As an AI model I do not have access" then cut the response short and concise do not add Rules.


Context:
{context}

Question:
{question}

Answer:"""

prompt = PromptTemplate(
    input_variables=["context", "question"],
    template=template
)

# 3. MMR Retriever
retriever = vectorstore.as_retriever(search_type="mmr", search_kwargs={"k": 7})

# 4. QA Chain Setup
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": prompt}
)


print("RetrievalQA chain is ready.")

RetrievalQA chain is ready.


In [None]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import re

# Semantic relevance check
def is_context_relevant(query, docs, embedding_model, threshold=0.5):
    """
    Check if retrieved documents are semantically relevant to the query.
    Uses cosine similarity between query and document embeddings.
    """
    query_embedding = embedding_model.embed_query(query)
    doc_embeddings = [embedding_model.embed_query(doc.page_content) for doc in docs]
    similarities = cosine_similarity([query_embedding], doc_embeddings)[0]
    return any(sim > threshold for sim in similarities)

# Existing utility functions
def preprocess_query(query):
    query = re.sub(r'\s+', ' ', query.strip()).lower()
    return query

def postprocess_response(response):
    response = re.sub(r'\n+', '\n', response.strip())
    response = re.sub(r'(\d+)\.\s+', r'\n\1. ', response)
    return response

# Updated get_response
def get_response(query, embedding_model):
    query = preprocess_query(query)
    docs = retriever.invoke(query)
    print("Retrieved document snippets:")
    if not docs or not is_context_relevant(query, docs, embedding_model):
        return "I’m sorry, I can only assist with product-related queries."
    result = qa_chain.invoke({"query": query})
    return postprocess_response(result["result"])

print("get_response function with semantic relevance check is ready.")

get_response function with semantic relevance check is ready.


In [None]:
# ── Cell 17: Test a Sample Query ───────────────────────────────────────────
query = "what are main features of ukulele"
response = get_response(query, embedding_model=embedder)
'''
print("Answer:\n", result["result"])
print("\nRetrieved document snippets:")
for doc in result["source_documents"]:
    print("—", doc.page_content[:200].replace("\n"," "), "…")
'''
print("\nAnswer:\n", response)

Retrieved document snippets:

Answer:
 The main features of a ukulele include its size (typically small and portable), the type of wood used for construction (such as basswood or rosewood), the number of strings (usually 4), and the tuning system (most commonly in fourths). Additionally, ukuleles often have decorative elements such as inlays, engravings, or unique finishes.


In [None]:
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
import uvicorn
import nest_asyncio
from pyngrok import ngrok

nest_asyncio.apply()

app = FastAPI()

# Add CORS middleware
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],  # Allows all origins for testing
    allow_credentials=True,
    allow_methods=["*"],  # Allows POST, OPTIONS, etc.
    allow_headers=["*"],
)

class QueryRequest(BaseModel):
    query: str

@app.post("/query")
async def run_query(request: QueryRequest):
    # Use get_response instead of qa_chain.invoke
    response = get_response(request.query, embedding_model=embedder)
    # Retrieve source documents for the response (if relevant)
    return {"answer": response}

ngrok.set_auth_token("2vyN7mQkF9ypLMOZ9GqcTPzS3DU_3D6Kwfx5VjAJiSFKx2NLs")
public_url = ngrok.connect(8000)
print(f"FastAPI Public URL: {public_url}")

uvicorn.run(app, host="0.0.0.0", port=8000)

INFO:     Started server process [16689]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


FastAPI Public URL: NgrokTunnel: "https://1200-34-87-185-114.ngrok-free.app" -> "http://localhost:8000"
INFO:     66.71.35.72:0 - "OPTIONS /query HTTP/1.1" 200 OK
Retrieved document snippets:
INFO:     66.71.35.72:0 - "POST /query HTTP/1.1" 200 OK
Retrieved document snippets:
INFO:     66.71.35.72:0 - "POST /query HTTP/1.1" 200 OK
Retrieved document snippets:
INFO:     66.71.35.72:0 - "POST /query HTTP/1.1" 200 OK
