In [1]:
import gradio as gr
from langchain.memory import ConversationBufferMemory
from langchain_chroma import Chroma
from langchain_ollama import OllamaEmbeddings
from langchain_community.llms import Ollama

# Mevcut vektör veritabanının adı
DB_NAME = "vector-db"

# Vektör veritabanını yükleme
embeddings = OllamaEmbeddings(model="nomic-embed-text")
vectorstore = Chroma(persist_directory=DB_NAME, embedding_function=embeddings)
retriever = vectorstore.as_retriever()

# Ollama modelini başlat
llm = Ollama(model="llama3.2")

# Chat Memory (Konuşma geçmişi için)
memory = ConversationBufferMemory()

def query_rag_pipeline(user_query, history):
    # Konuşma geçmişini al
    chat_history = memory.load_memory_variables({}).get("history", "")
    
    # RAG ile en alakalı belgeleri al
    retrieved_docs = vectorstore.similarity_search(user_query, k=10)
    combined_context = " ".join([doc.page_content for doc in retrieved_docs])

    # Prompt formatı
    prompt = f"""
    You are a telecom assistant. Your answers should be based on the context and chat history provided. If the context is not relevant to the user's query, politely state that you do not have the required information.

    Chat History: {chat_history}
    
    Context: {combined_context}
    
    Question: {user_query}
    
    Answer:
    """

    response = llm.invoke(prompt)

    # Konuşma geçmişini güncelle
    memory.save_context({"input": user_query}, {"output": response})
    
    return response

# Gradio UI
gr.ChatInterface(fn=query_rag_pipeline, title="Chatbot RAG Assistant 🤖", type="messages").launch(share=True)


  llm = Ollama(model="llama3.2")
  memory = ConversationBufferMemory()


* Running on local URL:  http://127.0.0.1:7863
* Running on public URL: https://11c400eb5dbbb051c0.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


