In [2]:
import os
import streamlit as st
from langchain.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain.llms import Ollama

In [8]:
llm = Ollama(model="deepseek-r1:1.5b")

In [9]:
llm.invoke('Hi there')

'<think>\n\n</think>\n\nHello! How can I assist you today? 😊'

In [11]:
# Step 1: Load and preprocess documents
def load_and_split_documents(file_path):
    loader = TextLoader(file_path)
    documents = loader.load()
    
    # Split documents into chunks
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
    texts = text_splitter.split_documents(documents)
    
    return texts

In [17]:
texts = load_and_split_documents('test.txt')
texts

[Document(metadata={'source': 'test.txt'}, page_content='Hi there! my name is Dileep and I am a data scientist')]

In [16]:

# Step 2: Create embeddings and FAISS vector store
def create_vector_store(texts):
    # Use a pre-trained embedding model (e.g., Sentence Transformers)
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    
    # Create FAISS vector store
    vector_store = FAISS.from_documents(texts, embeddings)
    
    return vector_store


In [18]:
vector_store=create_vector_store(texts)

  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


In [19]:
vector_store

<langchain_community.vectorstores.faiss.FAISS at 0x1d3a41895b0>

In [22]:

# Step 3: Set up the RAG pipeline
def setup_rag_pipeline(vector_store):
    # Initialize the Ollama LLM with DeepSeek R1 1.5B
    llm = Ollama(model="deepseek-r1:1.5b")
    
    # Create a RetrievalQA chain
    qa_chain = RetrievalQA.from_chain_type(
        llm=llm,
        chain_type="stuff",
        retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
        return_source_documents=True
    )
    
    return qa_chain


In [23]:
qa_chain =setup_rag_pipeline(vector_store)
qa_chain

RetrievalQA(verbose=False, combine_documents_chain=StuffDocumentsChain(verbose=False, llm_chain=LLMChain(verbose=False, prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.\n\n{context}\n\nQuestion: {question}\nHelpful Answer:"), llm=Ollama(model='deepseek-r1:1.5b'), output_parser=StrOutputParser(), llm_kwargs={}), document_prompt=PromptTemplate(input_variables=['page_content'], input_types={}, partial_variables={}, template='{page_content}'), document_variable_name='context'), return_source_documents=True, retriever=VectorStoreRetriever(tags=['FAISS', 'HuggingFaceEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001D3A41895B0>, search_kwargs={'k': 3}))

In [21]:

# Step 4: Query the RAG pipeline
def query_rag_pipeline(qa_chain, query):
    result = qa_chain({"query": query})
    return result["result"], result["source_documents"]


In [25]:
query = 'who is Dileep?'
result,source_docs = query_rag_pipeline(qa_chain, query)
print(result)

<think>
Okay, so I need to figure out who Dileep is. From the context given, it seems like he's known as "Dileep" or Dileep. He works as a data scientist. Let me think about how much background information I have.

I remember that sometimes people use first names and last names together when referring to themselves. So if he's using "Dileep," then perhaps his full name is Dileep [Last Name], making him a data scientist at some company or institution. Since the context doesn't mention any specific companies, I can't be sure about the location of his work. But knowing that his first name is Dileep gives me a sense of where he stands in the field.

I should consider if there's anything else in the context that might help narrow it down, but there doesn't seem to be additional information provided. So, based on what I have, I can conclude that Dileep is a data scientist whose first name is "Dileep" and perhaps has some background or experience relevant to his work.
</think>

Dileep is a we

In [26]:
print(source_docs)

[Document(id='630f2182-43ef-4a19-9089-f97aa6494501', metadata={'source': 'test.txt'}, page_content='Hi there! my name is Dileep and I am a data scientist')]


In [None]:

# Streamlit UI
def main():
    st.title("RAG Chatbot with DeepSeek R1 1.5B")
    
    # Load and process documents
    file_path = "test.txt"  # Replace with your document path
    if not os.path.exists(file_path):
        st.error(f"File not found: {file_path}")
        return
    
    texts = load_and_split_documents(file_path)
    vector_store = create_vector_store(texts)
    qa_chain = setup_rag_pipeline(vector_store)
    
    # Initialize chat history
    if "chat_history" not in st.session_state:
        st.session_state.chat_history = []
    
    # Display chat history
    st.subheader("Chat History")
    for i, (user_query, bot_response) in enumerate(st.session_state.chat_history):
        st.markdown(f"**You:** {user_query}")
        st.markdown(f"**Bot:** {bot_response}")
        st.markdown("---")
    
    # User input
    user_query = st.text_input("Ask a question:")
    
    if user_query:
        # Query the RAG pipeline
        bot_response, source_docs = query_rag_pipeline(qa_chain, user_query)
        
        # Update chat history (keep only last 3 interactions)
        st.session_state.chat_history.append((user_query, bot_response))
        if len(st.session_state.chat_history) > 3:
            st.session_state.chat_history.pop(0)
        
        # Display the bot's response
        st.subheader("Bot's Response")
        st.markdown(bot_response)
        
        # Display source documents
        st.subheader("Source Documents")
        for doc in source_docs:
            st.markdown(doc.page_content)
            st.markdown("---")
        
        # Rerun to update the chat history display
        st.experimental_rerun()

if __name__ == "__main__":
    main()