In [1]:
import os
from dotenv import load_dotenv, find_dotenv
_ = load_dotenv(find_dotenv())
openai_api_key = os.environ["OPENAI_API_KEY"]

## Basic App: Question & Answering from a Document

In [2]:
from langchain_openai import OpenAI
from langchain.callbacks.tracers import LangChainTracer

In [3]:
llm = OpenAI()
tracer = LangChainTracer(project_name="SimpleRAG3")

**Load the text file**

In [4]:
from langchain.document_loaders import TextLoader

In [5]:
loader = TextLoader("data/be-good-and-how-not-to-die.txt")

In [6]:
document = loader.load()

**The document is loaded as a Python list with metadata**

In [7]:
print(type(document))

<class 'list'>


In [8]:
print(len(document))

1


In [9]:
print(document[0].metadata)

{'source': 'data/be-good-and-how-not-to-die.txt'}


In [10]:
print(f"You have {len(document)} document.")

You have 1 document.


In [11]:
print(f"Your document has {len(document[0].page_content)} characters")

Your document has 27423 characters


**Split the document in small chunks**

In [12]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [13]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=3000,
    chunk_overlap=400
)

In [14]:
document_chunks = text_splitter.split_documents(document)

In [15]:
print(f"Now you have {len(document_chunks)} chunks.")

Now you have 12 chunks.


**Convert text chunks in numeric vectors (called "embeddings")**

In [16]:
from langchain.embeddings.openai import OpenAIEmbeddings

In [17]:
embeddings = OpenAIEmbeddings()

  embeddings = OpenAIEmbeddings()


**Load the embeddings to a vector database**

In [18]:
from langchain.vectorstores import FAISS

*Careful: the next operation is expensive in OpenAI*

In [19]:
stored_embeddings = FAISS.from_documents(document_chunks, embeddings)

**Create a Retrieval Question & Answering Chain**

In [20]:
from langchain.chains import RetrievalQA

In [21]:
QA_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=stored_embeddings.as_retriever()
)

**Now we have a Question & Answering APP**

In [22]:
question = """
Who is the author of this article?
"""

In [23]:
QA_chain.run(question, callbacks=[tracer])

  QA_chain.run(question, callbacks=[tracer])


'\n\nThe author of this article is Paul Graham.'

## New way: with LCEL

In [24]:
from langchain.prompts import PromptTemplate

template = """Answer the question based only on the following context:
{context}

Question: {question}
"""
prompt = PromptTemplate.from_template(template)

In [25]:
retriever = stored_embeddings.as_retriever()

In [26]:
from langchain.schema.output_parser import StrOutputParser
from langchain.schema.runnable import RunnablePassthrough

In [27]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [28]:
chain.invoke("What is the target audience of this article? Describe it in less than 50 words.")

'\nThe target audience of this article is likely entrepreneurs or individuals interested in starting a business. It provides advice and insights on how to be successful in the startup world and avoid failure.'

*See that the previous chain went to the default project since we did not set that differently*

## Test Dataset

In [29]:
import langsmith
from langsmith.evaluation import RunEvalConfig
from langsmith.client import Client

from langchain_openai import ChatOpenAI
# Si usas prompts personalizados, importa desde langchain_core.prompts o langchain.prompts

# Define el evaluador con la API moderna
eval_config = RunEvalConfig(
    evaluators=[
        "cot_qa"
    ],
    custom_evaluators=[],
    eval_llm=ChatOpenAI(model="gpt-4", temperature=0)
)

client = Client()

# Asegúrate de que 'chain' esté definido con la API moderna de LangChain
chain_results = client.run_on_dataset(
    dataset_name="simpleRagDataset",
    factory=chain,  # o llm_or_chain_factory=chain según la versión
    evaluation=eval_config,
    project_name="test-loyal-conference-76",
    concurrency_level=5,
    verbose=True,
)

ImportError: cannot import name 'RunEvalConfig' from 'langsmith.evaluation' (C:\Users\Juan\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\langsmith\evaluation\__init__.py)