In [None]:
!pip install openai

In [None]:
!pip install sentence-transformers --upgrade


In [None]:
!pip install ipywidgets jupyter --upgrade

In [None]:
!pip install faiss-cpu sentence-transformers

In [None]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import json
import pickle

class VectorDBBuilder:
    def __init__(self, json_file, faiss_index_file="vector.index", metadata_file="metadata.pkl"):
        self.json_file = json_file
        self.index_file = faiss_index_file
        self.metadata_file = metadata_file
        self.model = SentenceTransformer('all-MiniLM-L6-v2')
        self.metadata = []

    def build_index(self):
        with open(self.json_file, 'r', encoding='utf-8') as f:
            data = json.load(f)

        texts = []
        for entry in data:
            if entry.get("cleaned_content"):
                texts.append(entry["cleaned_content"])
                self.metadata.append({
                    "url": entry["url"],
                    "title": entry["title"]
                })

        embeddings = self.model.encode(texts, convert_to_numpy=True, show_progress_bar=True)
        dimension = embeddings.shape[1]

        index = faiss.IndexFlatL2(dimension)
        index.add(np.array(embeddings))

        faiss.write_index(index, self.index_file)
        with open(self.metadata_file, 'wb') as f:
            pickle.dump(self.metadata, f)

        print(f"✅ FAISS index saved to {self.index_file}")
        print(f"✅ Metadata saved to {self.metadata_file}")

# Usage
if __name__ == "__main__":
    builder = VectorDBBuilder("drdo_scraped_with_pdfs2.json")
    builder.build_index()


In [None]:
def search(query, top_k=5):
    import pickle
    import faiss
    from sentence_transformers import SentenceTransformer

    index = faiss.read_index("vector.index")
    with open("metadata.pkl", "rb") as f:
        metadata = pickle.load(f)

    model = SentenceTransformer('all-MiniLM-L6-v2')
    query_vector = model.encode([query])
    distances, indices = index.search(query_vector, top_k)

    results = []
    for i in indices[0]:
        results.append(metadata[i])

    return results

# Example usage
print(search("missile development heads"))

In [None]:
!pip install ctransformers sentence-transformers faiss-cpu

In [None]:
!pip uninstall numpy -y
!pip install numpy==1.26.4

### Download this model and save that in a folder named models/tinyllama-1.1b-chat-v1.0.Q8_0.gguf

this is the link for that as the model cant be pushed due to larger size
https://huggingface.co/TheBloke/TinyLlama-1.1B-Chat-v1.0-GGUF/blob/main/tinyllama-1.1b-chat-v1.0.Q8_0.gguf

In [None]:
import os
os.environ["USE_TF"] = "0"

from ctransformers import AutoModelForCausalLM
from sentence_transformers import SentenceTransformer
import faiss, pickle

# Load your FAISS vector DB + metadata
index = faiss.read_index("vector.index")
with open("metadata.pkl", "rb") as f:
    metadata = pickle.load(f)

# Sentence embedding model (fast + free)
embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

# Load tiny local LLM (.gguf)
llm = AutoModelForCausalLM.from_pretrained(
    "models",
    model_file="tinyllama-1.1b-chat-v1.0.Q8_0.gguf",
    model_type="llama",
    gpu_layers=0,
    max_new_tokens=512,
    temperature=0.7
)

def retrieve_context(query, k=5):
    vec = embedding_model.encode([query])
    distances, indices = index.search(vec, k)
    return [metadata[i] for i in indices[0]]

def build_prompt(query, context):
    context_text = "\n\n".join(
        f"Title: {doc['title']}\nURL: {doc['url']}\n\n{doc.get('cleaned_content', '')[:1000]}"
        for doc in context
    )
    return f"""You are a helpful assistant. Use the context below to answer the question.

Context:
{context_text}

Question: {query}

Answer:"""

# Simple terminal agent
if __name__ == "__main__":
    print("🤖 DRDO Agent is ready. Type 'exit' to quit.")
    while True:
        q = input("\nYou: ")
        if q.lower() in ['exit', 'quit']:
            break
        docs = retrieve_context(q)
        prompt = build_prompt(q, docs)
        print("\nAgent:", llm(prompt))


Download the Ollama from this and then set it up : https://ollama.com/download

Verify by ollama --version

In [None]:
%pip install faiss-cpu langchain sentence-transformers requests ollama

## Install and Run Ollama
If you haven't already:

```bash
curl -fsSL https://ollama.com/install.sh | sh
```
Then start Ollama in your terminal:

```bash
ollama run mistral
```


In [None]:
from ollama import Client
from sentence_transformers import SentenceTransformer
import faiss
import pickle

# Connect to local Ollama
client = Client(host='http://localhost:11434')

# Load FAISS vector index and metadata
index = faiss.read_index("vector.index")
with open("metadata.pkl", "rb") as f:
    metadata = pickle.load(f)

embedding_model = SentenceTransformer("all-MiniLM-L6-v2")

def retrieve_context(query, k=5):
    vec = embedding_model.encode([query])
    distances, indices = index.search(vec, k)
    return [metadata[i] for i in indices[0]]

def build_prompt(query, context):
    context_text = "\n\n".join(
        f"Title: {doc['title']}\nURL: {doc['url']}\n\n{doc.get('cleaned_content', '')[:1000]}"
        for doc in context
    )
    return f"""You are a helpful assistant. Use the context below to answer the question.

Context:
{context_text}

Question: {query}

Answer:"""

def chat_with_mistral(query):
    context_docs = retrieve_context(query)
    prompt = build_prompt(query, context_docs)

    response = client.chat(
        model="mistral",
        messages=[{"role": "user", "content": prompt}]
    )
    return response['message']['content']
while True:
    user_query = input("🧠 You: ")
    if user_query.lower() in ["exit", "quit"]:
        break
    print("\n🤖 Agent:", chat_with_mistral(user_query))

print(chat_with_mistral("What technologies are being used in missile systems?"))
