In [7]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from transformers import pipeline
import gc

# Global memory dictionary for storing session-wise history
session_memory = {}

def process_query(query, session_id="default"):
    """Processes the query and returns the response, with session memory."""

    # If session_id not in memory, initialize it
    if session_id not in session_memory:
        session_memory[session_id] = {
            "history": [],  # Stores (query, response) tuples
        }

    # Load and split document
    loader = TextLoader("Latest_news.txt", encoding="utf-8")
    docs = loader.load()
    splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
    chunks = splitter.split_documents(docs)

    # Create vector database
    embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vector_db = FAISS.from_documents(chunks, embedding_model)

    # Similarity search
    retrieved_docs = vector_db.similarity_search(query, k=3)
    context = "\n".join([doc.page_content for doc in retrieved_docs])

    # Add previous Q&A from this session to the context
    previous_context = "\n".join([f"Q: {q}\nA: {a}" for q, a in session_memory[session_id]["history"]])
    full_context = f"{previous_context}\n{context}" if previous_context else context

    # Generate response using LLM
    llm = pipeline("text-generation", model="G:\My Drive\llama-3.2-3b-instruct")
    prompt = f"Answer the question based on the context:\n{full_context}\n\nQuestion: {query}\nAnswer:"
    response = llm(prompt, max_new_tokens=300)[0]["generated_text"]

    # Store in session history
    session_memory[session_id]["history"].append((query, response))

    # Clean up memory
    del loader, docs, splitter, chunks, embedding_model, vector_db, retrieved_docs, context, llm, prompt
    gc.collect()

    return response

# Example usage
query = "Thanks for your informations"
response = process_query(query, session_id="user123")
print(response)


Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00,  8.54it/s]
Device set to use cuda:0
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Answer the question based on the context:
Hi, I’m M.Ajay sivakumar, and I’m from Tamil Nadu, India. I’m an AI enthusiast deeply passionate about artificial intelligence and its potential to shape the future. I’ve been building intelligent systems like transformers, self-learning agents, and research-driven LLMs from scratch.
I’ve also worked on fine-tuning models, creating agents, and exploring the foundations of AGI. 
My ultimate aim is to work at OpenAI and contribute to developing responsible, powerful AI.
My ultimate aim is to work at OpenAI and contribute to developing responsible, powerful AI. 
I believe in learning continuously, pushing my limits, and stepping out of my comfort zone — which is why I also dream of working in the U.S. or Europe to gain global experience. 
Every day, I’m growing not just as a developer, but as someone who wants to make a real impact in the world through AI.
personal stuggels: "The Chronicles of Lord Ajay: A Heart That Chose Love Even in Chaos"
But 