In [None]:
# !pip install langchain
# !pip install langchain-community
# !pip install langchain-openai
# !pip install langgraph
# !pip install "weaviate-client==3.*"
# !pip install pip-system-certs

In [1]:
import os
import requests
from typing import List, Dict, Any, TypedDict
from langchain_community.document_loaders import TextLoader

from langchain_core.documents import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
# from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Weaviate
# from langchain_openai import ChatOpenAI
from langchain_community.chat_models import ChatOllama
from langchain.text_splitter import CharacterTextSplitter
from langchain.schema.runnable import RunnablePassthrough
from langgraph.graph import StateGraph, END
import weaviate
from weaviate.embedded import EmbeddedOptions
import dotenv

In [2]:
dotenv.load_dotenv()

True

In [3]:
loader = TextLoader('./state_of_the_union.txt')
documents = loader.load()
# Разбиваем документы на куски
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
chunks = text_splitter.split_documents(documents)

# Встраиваем и сохраняем куски в Weaviate
client = weaviate.Client(
    embedded_options=EmbeddedOptions()
)

vectorstore = Weaviate.from_documents(
    client=client,
    documents=chunks,
    # embedding=OpenAIEmbeddings(),
    embedding=OllamaEmbeddings(
        model="hf.co/CompendiumLabs/bge-base-en-v1.5-gguf",  
        base_url="http://localhost:11434"  # Default Ollama server address
    ),
    by_text=False
)

Started /Users/karineayrapetyants/.cache/weaviate-embedded: process ID 53618


{"action":"startup","default_vectorizer_module":"none","level":"info","msg":"the default vectorizer modules is set to \"none\", as a result all new schema classes without an explicit vectorizer setting, will use this vectorizer","time":"2025-10-17T19:30:07+03:00"}
{"action":"startup","auto_schema_enabled":true,"level":"info","msg":"auto schema enabled setting is set to \"true\"","time":"2025-10-17T19:30:07+03:00"}
{"level":"info","msg":"No resource limits set, weaviate will use all available memory and CPU. To limit resources, set LIMIT_RESOURCES=true","time":"2025-10-17T19:30:07+03:00"}
{"action":"grpc_startup","level":"info","msg":"grpc server listening at [::]:50060","time":"2025-10-17T19:30:07+03:00"}
{"action":"restapi_management","level":"info","msg":"Serving weaviate at http://127.0.0.1:8079","time":"2025-10-17T19:30:07+03:00"}
  embedding=OllamaEmbeddings(
{"level":"info","msg":"Created shard langchain_98da6e974c83443abfd07128c92cfdb0_W1SYEa6XALDH in 2.965541ms","time":"2025-10

In [4]:
retriever = vectorstore.as_retriever()
# llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0)
llm = ChatOllama(model="hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF")

  llm = ChatOllama(model="hf.co/bartowski/Llama-3.2-1B-Instruct-GGUF")


In [5]:
# --- 2. Определяем состояние для LangGraph ---
class RAGGraphState(TypedDict):
    question: str
    documents: List[Document]
    generation: str

# --- 3. Определяем узлы (функции) ---

def retrieve_documents_node(state: RAGGraphState) -> RAGGraphState:
    """Извлекает документы на основе вопроса пользователя."""
    question = state["question"]
    documents = retriever.invoke(question)
    return {"documents": documents, "question": question, "generation": ""}

def generate_response_node(state: RAGGraphState) -> RAGGraphState:
    """Генерирует ответ, используя LLM на основе извлеченных документов."""
    question = state["question"]
    documents = state["documents"]

    # Шаблон prompt
    template = """You are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use three sentences maximum and keep the answer concise.
Question: {question}
Context: {context}
Answer:
"""
    prompt = ChatPromptTemplate.from_template(template)

    # Форматируем контекст из документов
    context = "\n\n".join([doc.page_content for doc in documents])

    # Создаем RAG-цепочку
    rag_chain = prompt | llm | StrOutputParser()

    # Вызываем цепочку
    generation = rag_chain.invoke({"context": context, "question": question})
    return {"question": question, "documents": documents, "generation": generation}


In [6]:
# --- 4. Строим граф LangGraph ---

workflow = StateGraph(RAGGraphState)

# Добавляем узлы
workflow.add_node("retrieve", retrieve_documents_node)
workflow.add_node("generate", generate_response_node)

# Устанавливаем точку входа
workflow.set_entry_point("retrieve")

# Добавляем рёбра (переходы)
workflow.add_edge("retrieve", "generate")
workflow.add_edge("generate", END)

# Компилируем граф
app = workflow.compile()


In [7]:
# --- 5. Запускаем RAG-приложение ---
if __name__ == "__main__":
    print("\n--- Запуск RAG-запроса ---")
    query = "What did the president say about Justice Breyer"
    inputs = {"question": query}
    for s in app.stream(inputs):
        print(s)

    print("\n--- Запуск другого RAG-запроса ---")
    query_2 = "What did the president say about the economy?"
    inputs_2 = {"question": query_2}
    for s in app.stream(inputs_2):
        print(s)


--- Запуск RAG-запроса ---
{'retrieve': {'documents': [Document(metadata={'source': './state_of_the_union.txt'}, page_content='Tonight, I’d like to honor someone who has dedicated his life to serve this country: Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service. \n\nOne of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court.'), Document(metadata={'source': './state_of_the_union.txt'}, page_content='And I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.'), Document(metadata={'source': './state_of_the_union.txt'}, page_content='But in my administration, the watchdogs have been welcomed back. \n\nWe’re going after the criminals who stole billions in relief money m