In [None]:
# ✅ 1. Install everything needed
!pip install faiss-cpu sentence-transformers transformers accelerate

# ✅ 2. Import libraries
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# ✅ 3. Load models (separately!)
embedder = SentenceTransformer('all-MiniLM-L6-v2')  # For embeddings
tokenizer = AutoTokenizer.from_pretrained('microsoft/phi-2')  # For generation
llm = AutoModelForCausalLM.from_pretrained('microsoft/phi-2', device_map="auto", torch_dtype=torch.float16)


Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_6

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.34k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/798k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.11M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/1.08k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/99.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/735 [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/35.7k [00:00<?, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/564M [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

In [None]:
from sentence_transformers import SentenceTransformer

# Load the model
model = SentenceTransformer('all-MiniLM-L6-v2')

def embed_chunks(chunks):
    texts = [chunk["text"] for chunk in chunks]
    embeddings = model.encode(texts, show_progress_bar=True)

    # Append embedding to each chunk
    for i in range(len(chunks)):
        chunks[i]["embedding"] = embeddings[i].tolist()  # Convert to JSON-safe list

    return chunks


In [None]:
# chunks = [
#     {"page_number": 1, "text": "RTCU is a rugged and programmable IoT controller designed for industrial-grade communication and integration."},
#     {"page_number": 2, "text": "It supports MQTT, HTTP, and RESTful APIs, making it suitable for SCADA and cloud integration."},
#     {"page_number": 3, "text": "The RTCU platform enables remote firmware updates and edge processing capabilities."}
# ]

chunks = embed_chunks
# Embed each chunk
vectors = embedder.encode([chunk["text"] for chunk in chunks], show_progress_bar=True)
for i, chunk in enumerate(chunks):
    chunk["embedding"] = vectors[i].tolist()


TypeError: 'function' object is not iterable

In [None]:
def build_faiss_index(chunks):
    dim = len(chunks[0]["embedding"])
    index = faiss.IndexFlatL2(dim)
    metadata = [ {"page_number": c["page_number"]} for c in chunks ]
    texts = [c["text"] for c in chunks]
    index.add(np.array([np.array(c["embedding"], dtype=np.float32) for c in chunks]))
    return index, texts, metadata

index, texts, metadata = build_faiss_index(chunks)


In [None]:
def search_faiss(query, embedder_model, index, texts, metadata, top_k=3):
    query_embedding = embedder_model.encode([query])[0].astype("float32")
    distances, indices = index.search(np.array([query_embedding]), top_k)

    results = []
    for idx in indices[0]:
        results.append({
            "text": texts[idx],
            "metadata": metadata[idx]
        })

    return results

def build_rag_prompt(query, context_chunks):
    context = "\n\n".join(chunk["text"] for chunk in context_chunks)
    prompt = f"""
You are a helpful assistant. Use the information below to answer the question.

### Context:
{context}

### Question:
{query}

### Answer:"""
    return prompt.strip()

def generate_answer(prompt, max_tokens=300):
    inputs = tokenizer(prompt, return_tensors="pt").to(llm.device)
    outputs = llm.generate(
        **inputs,
        max_new_tokens=max_tokens,
        do_sample=True,
        temperature=0.7,
        top_p=0.95,
        pad_token_id=tokenizer.eos_token_id
    )
    return tokenizer.decode(outputs[0], skip_special_tokens=True).split("### Answer:")[-1].strip()


In [None]:
def rag_pipeline(query):
    results = search_faiss(query, embedder, index, texts, metadata, top_k=3)
    prompt = build_rag_prompt(query, results)
    answer = generate_answer(prompt)
    return answer, results


In [None]:
# query = "What are the integration features of RTCU?"
query = input("Enter your query: ")
answer, retrieved_context = rag_pipeline(query)

print("🔍 Answer:\n")
print(answer)
