In [21]:
from langchain.indexes import VectorstoreIndexCreator 
from langchain_community.utilities import ApifyWrapper 
from langchain_core.documents import Document
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_openai import ChatOpenAI

In [22]:
llm = ChatOpenAI(model_name = "gpt-4o-mini", temperature = 0)

In [3]:
from dotenv import load_dotenv, find_dotenv

load_dotenv(find_dotenv(), override = True)

True

In [4]:
apify = ApifyWrapper()

In [16]:
embedding = OpenAIEmbeddings(model = "text-embedding-3-small")

In [26]:
loader = apify.call_actor(
    actor_id = "apify/website-content-crawler", 
    run_input = {"startUrls": [{"url": "https://python.langchain.com/v0.2/docs/introduction/"}]}, 
    dataset_mapping_function = lambda item: Document(
        page_content = item["text"] or "", metadata = {"source": item["url"]}
    )
)

In [27]:
# Here, the embedding argument for VectorstoreIndexCreator is needed

index = VectorstoreIndexCreator(embedding = embedding).from_loaders([loader])



In [28]:
query = "What is LangChain?" 

output = index.query_with_sources(llm = llm, question = query)

In [30]:
print(output)
print("#" * 50)
print(output["answer"]) 
print(output["sources"])

{'question': 'What is LangChain?', 'answer': 'LangChain is a framework for developing applications powered by large language models (LLMs). It simplifies every stage of the LLM application lifecycle, including development, productionization, and deployment. The framework consists of various open-source libraries, such as langchain-core, langchain-community, and LangGraph, which help in building robust applications and deploying them as REST APIs. LangChain also provides tools for debugging, testing, and monitoring LLM applications.\n\n', 'sources': 'https://python.langchain.com/v0.2/docs/introduction/'}
##################################################
LangChain is a framework for developing applications powered by large language models (LLMs). It simplifies every stage of the LLM application lifecycle, including development, productionization, and deployment. The framework consists of various open-source libraries, such as langchain-core, langchain-community, and LangGraph, which hel