In [2]:
import os
import openai
import dotenv
from llama_index.core import SimpleDirectoryReader


dotenv.load_dotenv()

openai.api_key = os.getenv("OPENAI_API_KEY")

documents = SimpleDirectoryReader(
    input_files=["./eBook-How-to-Build-a-Career-in-AI.pdf"]
).load_data()

documents[0]

Document(id_='d2bf2039-5731-4a53-91fa-7180a3299cd9', embedding=None, metadata={'page_label': '1', 'file_name': 'eBook-How-to-Build-a-Career-in-AI.pdf', 'file_path': 'eBook-How-to-Build-a-Career-in-AI.pdf', 'file_type': 'application/pdf', 'file_size': 3717673, 'creation_date': '2025-12-02', 'last_modified_date': '2025-12-02'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='PAGE 1\nFounder, DeepLearning.AI\nCollected Insights\nfrom Andrew Ng\nHow to \nBuild\nYour\nCareer\nin AI\nA Simple Guide\n', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}')

In [4]:
from llama_index.core import Document, VectorStoreIndex
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core import Settings
from llama_index.llms.openai import OpenAI

Settings.llm = OpenAI(model="gpt-4o-mini", temperature=0.1)

Settings.embed_model = HuggingFaceEmbedding(
    model_name="BAAI/bge-small-en-v1.5"
)

document = Document(text="\n\n".join([doc.text for doc in documents]))
index = VectorStoreIndex.from_documents([document], llm=Settings.llm, embed_model=Settings.embed_model)

query_engine = index.as_query_engine()
response = query_engine.query("What is the main idea of the book?")
print(response)



2025-12-08 13:03:12,827 - INFO - Load pretrained SentenceTransformer: BAAI/bge-small-en-v1.5
2025-12-08 13:03:17,316 - INFO - 1 prompt is loaded, with the key: query
2025-12-08 13:03:28,617 - INFO - HTTP Request: POST https://api.openai.com/v1/chat/completions "HTTP/1.1 200 OK"


The main idea of the book is to provide a comprehensive guide for building a successful career in artificial intelligence (AI). It emphasizes the importance of learning foundational technical skills, working on meaningful projects to deepen those skills and build a portfolio, and effectively navigating the job search process in the AI field. The book also addresses the evolving nature of coding as a vital skill in the age of AI and encourages individuals to embrace AI literacy to enhance their professional opportunities.


##### Evaluation setup using TruLens

In [5]:
eval_questions = []
with open("./eval_questions.txt", "r") as file:
    eval_questions = file.readlines()

eval_questions = [question.strip() for question in eval_questions]


In [6]:
new_q = "How much math should I know to build a career in AI?"
eval_questions.append(new_q)

In [8]:
from trulens.core import Tru
from utils import get_prebuilt_trulens_recorder
import os

os.environ["TRULENS_OTEL_TRACING"] = "0"


tru = Tru()
tru.reset_database()

tru_recorder = get_prebuilt_trulens_recorder(query_engine, "Direct Query Evaluation")

with tru_recorder as recording:
    for question in eval_questions:
        response = query_engine.query(question)

records, feedbacks = tru.get_records_and_feedback(app_ids=[])
records.head()
tru.run_dashboard()

  tru = Tru()
2025-12-08 13:05:14,370 - INFO - Context impl SQLiteImpl.
2025-12-08 13:05:14,370 - INFO - Will assume non-transactional DDL.
2025-12-08 13:05:14,384 - INFO - Context impl SQLiteImpl.
2025-12-08 13:05:14,385 - INFO - Will assume non-transactional DDL.


âœ… In Answer Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
âœ… In Answer Relevance, input response will be set to __record__.main_output or `Select.RecordOutput` .
âœ… In Context Relevance, input prompt will be set to __record__.main_input or `Select.RecordInput` .
âœ… In Context Relevance, input response will be set to __record__.calls[-1].rets.source_nodes[:].node.text .
âœ… In Groundedness, input source will be set to __record__.calls[-1].rets.source_nodes[:].node.text .
âœ… In Groundedness, input statement will be set to __record__.main_output or `Select.RecordOutput` .
ðŸ¦‘ Initialized with db url sqlite:///default.sqlite .
ðŸ›‘ Secret keys may be written to the database. See the `database_redact_keys` option of `TruSession` to prevent this.


Updating app_name and app_version in apps table: 0it [00:00, ?it/s]
Updating app_id in records table: 0it [00:00, ?it/s]
Updating app_json in apps table: 0it [00:00, ?it/s]

instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.embeddings.multi_modal_base.MultiModalEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.base.embeddings.base.BaseEmbedding'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.TransformComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'llama_index.core.schema.BaseComponent'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base <class 'pydantic.main.BaseModel'>
instrumenting <class 'llama_index.embeddings.huggingface.base.HuggingFaceEmbedding'> for base




instrumenting <class 'llama_index.core.base.llms.types.LLMMetadata'> for base <class 'llama_index.core.base.llms.types.LLMMetadata'>
instrumenting <class 'llama_index.core.base.llms.types.LLMMetadata'> for base <class 'pydantic.main.BaseModel'>
instrumenting <class 'llama_index.core.base.llms.types.LLMMetadata'> for base <class 'object'>
instrumenting <class 'tuple'> for base <class 'tuple'>
instrumenting <class 'tuple'> for base <class 'object'>
instrumenting <class 'llama_index.core.indices.prompt_helper.PromptHelper'> for base <class 'llama_index.core.indices.prompt_helper.PromptHelper'>
instrumenting <class 'llama_index.core.indices.prompt_helper.PromptHelper'> for base <class 'llama_index.core.schema.BaseComponent'>
instrumenting <class 'llama_index.core.indices.prompt_helper.PromptHelper'> for base <class 'pydantic.main.BaseModel'>
instrumenting <class 'llama_index.core.indices.prompt_helper.PromptHelper'> for base <class 'object'>
instrumenting <class 'llama_index.core.response_



Starting dashboard ...



  tru.run_dashboard()
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Accordion(children=(VBox(children=(VBox(children=(Label(value='STDOUT'), Output())), VBox(children=(Label(valuâ€¦

Dashboard started at http://localhost:53224 .


<Popen: returncode: None args: ['streamlit', 'run', '--server.headless=True'...>