In [3]:
import json
import numpy as np
import faiss
import torch
from transformers import AutoTokenizer, AutoModel, AutoModelForSeq2SeqLM



  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Load the chunked JSON data with explicit encoding
def load_data(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return json.load(file)

In [5]:
def prepare_retrieval_index(data):
    # Extract text chunks from the data
    texts = []
    for chunk in data:
        if 'content' in chunk:
            texts.append(chunk['content'])

    # Create embeddings for the text chunks
    tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
    model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2").to('cuda')

    # Encode the texts
    inputs = tokenizer(texts, padding=True, truncation=True, return_tensors="pt").to('cuda')
    with torch.no_grad():
        embeddings = model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy()

    # Create a FAISS index
    index = faiss.IndexFlatL2(embeddings.shape[1])  # L2 distance
    index.add(embeddings)
    return index, texts

In [6]:
def retrieve_relevant_text(query, index, texts):
    tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
    model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2").to('cuda')

    # Encode the query
    inputs = tokenizer(query, return_tensors="pt").to('cuda')
    with torch.no_grad():
        query_embedding = model(**inputs).last_hidden_state.mean(dim=1).cpu().numpy()

    # Perform the search
    D, I = index.search(query_embedding, k=1)  # Retrieve top 1
    return texts[I[0][0]]

In [7]:
def generate_response(retrieved_text, query):
    # Load a generative model
    model = AutoModelForSeq2SeqLM.from_pretrained("facebook/bart-large").to('cuda')
    tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large")

    # Prepare the input for the generative model
    input_text = f"Context: {retrieved_text}\nUser  Query: {query}\nResponse:"
    inputs = tokenizer(input_text, return_tensors="pt", max_length=512, truncation=True).to('cuda')

    # Generate the response
    with torch.no_grad():
        output = model.generate(**inputs)
    return tokenizer.decode(output[0], skip_special_tokens=True)

In [8]:
def run_rag_pipeline():
    # Load the JSON data
    data = load_data('chunks/dr-arunkumar_chunks.json')

    # Prepare the retrieval index
    index, texts = prepare_retrieval_index(data)

    print("who is arun kumar")
    print("Type 'exit' to end the chat.")

    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Chatbot: Goodbye!")
            break
        
        # Retrieve relevant text
        retrieved_text = retrieve_relevant_text(user_input, index, texts)
        
        # Generate a response
        response = generate_response(retrieved_text, user_input)
        print(f"Chatbot: {response}")

# Run the RAG pipeline
if __name__ == "__main__":
    run_rag_pipeline()

who is arun kumar
Type 'exit' to end the chat.


: 