In [1]:
import langchain
import os
import bs4
from pprint import pprint
from langchain import hub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.vectorstores import Chroma
from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
from dotenv import load_dotenv
load_dotenv()

True

In [3]:
LANGSMITH_PROJECT="rag-virtual-assistant-course "

# INDEXING

In [4]:
# Load documents from the web
loader = WebBaseLoader(
    web_paths=[
        "https://www.reuters.com/world/europe/greece-ask-eu-fiscal-leeway-defence-spending-minister-says-2025-04-29/",
        "https://www.ekathimerini.com/economy/1264299/moodys-upgrade-of-the-greek-economy-is-significant-says-govt-spox/"
    ],
    bs_kwargs={
        # Optional: you can remove `bs_kwargs` if the websites don't need specific filtering
        "parse_only": bs4.SoupStrainer(["article", "body", "main", "section", "div", "p"])
    }
)
docs = loader.load()

# Split documents into manageable chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(docs)

# Create vectorstore with OpenAI embeddings
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=OpenAIEmbeddings()
)

retriever = vectorstore.as_retriever()

# RETRIEVAL + GENERATION

In [5]:
# Pull a standard RAG prompt template from LangChain Hub
prompt = hub.pull("rlm/rag-prompt")

In [6]:
human_prompt = prompt.messages[0].prompt.template
print(human_prompt)

You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.
Question: {question} 
Context: {context} 
Answer:


In [7]:
# Initialize LLM
llm = ChatOpenAI(
    model_name="gpt-4.1",
    temperature=0
)

# Define a post-processor for the retrieved documents
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [8]:
# Create the full RAG chain
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


# Ask a Question

In [9]:
response = rag_chain.invoke("How is the Greek economy doing in 2025?")
print(response)

In 2025, the Greek economy is performing well, as evidenced by Moody’s upgrading Greece to investment grade—a significant milestone. The upgrade reflects rapid debt reduction, increased revenues from anti-tax evasion measures, strong budget performance, and a decline in non-performing loans. The government highlights these improvements as key indicators of economic progress.


In [10]:
response = rag_chain.invoke("What about financial benefits?")
print(response)

The recent upgrade of Greece’s economy to investment grade by Moody’s brings significant financial benefits. It reflects improved economic stability, reduced debt, increased revenues, and a decline in non-performing loans, which can lower borrowing costs and attract more investment. This upgrade is expected to boost investor confidence and support further economic growth.


In [11]:
response = rag_chain.invoke("How will this relief the households?")
print(response)

The upgrade of Greece’s economy to investment grade by Moody’s is expected to benefit households by improving the country’s financial stability and lowering borrowing costs. This can lead to lower interest rates on loans and mortgages, making credit more affordable for families. Additionally, increased investor confidence may boost economic growth and job opportunities, further supporting household incomes.
