# Setting environment variables to log traces with Langsmith:

In [5]:
import os          # Imports Python's built-in "os module" for interacting with the operating system. (e.g environment variables)
from dotenv import load_dotenv          # Imports the "load_dotenv function" from the "dotenv module" to load environment variables from the .env file. 
import requests          # Imports the "requests library" to make HTTP requests (used to verify the Langsmith API connection)


load_dotenv(          # Loads environment variables from the .env file.
    dotenv_path = ".env",           #Specifies the path to the .env file, which contains environment variables. The default is .env in the current directory.
    override = True           # Allows the loaded environment variables in the .env file to override any existing environment variables. 
)

os.environ["LANGSMITH_TRACING"] = os.getenv("LANGSMITH_TRACING")          # Retrieves the value of the loaded "LANGSMITH_TRACING" variable from the .env file and sets it in Python's runtime environment. This ensures Langsmith can access the value.
os.environ["LANGSMITH_ENDPOINT"] = os.getenv("LANGSMITH_ENDPOINT")          # Retrieves the value of the loaded "LANGSMITH_ENDPOINT" variable from the .env file and sets it in Python's runtime environment. This ensures Langsmith can access the value.
os.environ["LANGSMITH_API_KEY"] = os.getenv("LANGSMITH_API_KEY")          # Retrieves the value of the loaded "LANGSMITH_API_KEY" variable from the .env file and sets it in Python's runtime environment. This ensures Langsmith can access the value.
os.environ["LANGSMITH_PROJECT"] = os.getenv("LANGSMITH_PROJECT")          # Retrieves the value of the loaded "LANGSMITH_PROJECT" variable from the .env file and sets it in Python's runtime environment. This ensures Langsmith can access the value.

headers = {          # Creates a dictionary to store HTTP headers for the request. This particular dictionary is for the "Authorization header" which is required to aunthenticate the request to the Langsmith API.
    "Authorization": f"Bearer {os.getenv("LANGSMITH_API_KEY")}"          # Sets the "Authorization header" with the value of the "LANGSMITH_API_KEY" environment variable. This is used to authenticate the request to the Langsmith API.
}
response = requests.get(          # Makes a GET request to Langsmith's API endpoint to verify the connection.
    "https://api.smith.langchain.com",          # The URL of Langsmith's API endpoint.
    headers=headers          # Passes the dictionary containing the "Authorization header" to authenticate the request.
)

print(response.status_code)          # Prints the HTTP status code of the response. A status code of 200 indicates a successful connection to the Langsmith API.
print(response.json())          # Prints the JSON response from the Langsmith API, which typically contains information about the API connection or any relevant data returned by the request.

404
{'detail': 'Not Found'}


# Loading the PDF document:

In [None]:
from langchain_community.document_loaders import PyMuPDFLoader          # Imports the "PyMuPDFLoader" class from LangChain's document loaders. This loader specializes in extracting text and metadata from PDF files using the PyMuPDF library.
import pprint          # Imports the "pprint module" for pretty-printing data structures, making them easier to read in the console.

file_path = r"C:\Users\user\Downloads\HANNY ABUBAKAR CV.pdf"          # Specifies the path to the PDF file that wiill be loaded. The "r" prefix ensures that the backslashes are treated as "literal characters" and not as escape sequences. 
loader = PyMuPDFLoader(          # Initializes the "PyMuPDFLoader" with the specified PDF file path. 
    file_path,          # The path of the file to be loaded.
    # mode="single"          # Specfies the mode in which the document will be loaded. The "single" mode means the entire document will be treated as one, the "page" mode means that each page will be treated as a separate document. 
)          
loaded_doc = loader.load()          # Executes the PDF parsing and text extraction process, returning a list of Document objects.     

print(f"This document has {len(loaded_doc)} pages.")
pprint.pp(loaded_doc)


This document has 4 pages.
[Document(metadata={'producer': 'Skia/PDF m124', 'creator': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/124.0.0.0 Safari/537.36', 'creationdate': '2025-05-14T16:31:22+00:00', 'source': 'C:\\Users\\user\\Downloads\\HANNY ABUBAKAR CV.pdf', 'file_path': 'C:\\Users\\user\\Downloads\\HANNY ABUBAKAR CV.pdf', 'total_pages': 4, 'format': 'PDF 1.4', 'title': 'Hanny Abubakar Resume', 'author': '', 'subject': '', 'keywords': '', 'moddate': '2025-05-14T16:31:22+00:00', 'trapped': '', 'modDate': "D:20250514163122+00'00'", 'creationDate': "D:20250514163122+00'00'", 'page': 0}, page_content="Hanny Abubakar\nA16 Vegas Court Estate, Lugbe, Abuja, FCT • (+234) 9059598249 • abubakarhannyom@gmail.com •\nlinkedin.com/in/hannyabubakar • https://www.hanny.com.ng/\nApplicant Graduate Research Intern\nI am an AI Software Developer with a background in Mechatronics Engineering looking to specialize in Robotics and\nArtificial Intelligence in 

# Splitting the loaded PDF document into chunks:


In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter          # Imports the "RecursiveCharacterTextSplitter" class from Langchain's text splitters. This class attempts to keep larger units (e.g., paragraphs or sentences) intact while keeping the text within a specified character limit. 

text_splitter = RecursiveCharacterTextSplitter(          # Initializes the "RecursiveCharacterTextSplitter" with specific paramaters on how to split the text.
    chunk_size = 1000,          # Defines the maximum number of characters in each chunk. (the text will be split into chunks that are at most 1000 characters long).
    chunk_overlap = 200,          # Defines the maximum number of characters that can overlap between consecutive chunks. 
)
all_chunks = text_splitter.split_documents(loaded_doc)          # Splits the loaded PDF document into chunks.

print(f"This document has been split into {len(all_chunks)} chunks.")          

for each_chunk in all_chunks:          
    print("")
    print(each_chunk.page_content)
    print("")
    print("-----" * 500)

This document has been split into 11 chunks.

Hanny Abubakar
A16 Vegas Court Estate, Lugbe, Abuja, FCT • (+234) 9059598249 • abubakarhannyom@gmail.com •
linkedin.com/in/hannyabubakar • https://www.hanny.com.ng/
Applicant Graduate Research Intern
I am an AI Software Developer with a background in Mechatronics Engineering looking to specialize in Robotics and
Artificial Intelligence in automobiles. Passionate about solving problems using technology and driving innovation in the tech
sector.
SKILLS
Tools and Frameworks: Arduino ​Uno, Keras, LangChain, NumPy, On-​ ​Board ​Diagnostics ​2, Pandas, PyTorch, Scikit-​ ​
Learn, Streamlit, TensorFlow
Software: Figma, Git ​&​ ​Github, MATLAB/​ ​Simulink, Solidworks, Visual ​Studio ​Code
PLC/ DCS Programming: Siemens
Process Control and Industrial Networks: TCP/​ ​IP
HMI Design: Win ​CC
Cloud Platforms: AWS, Azure, Google ​Cloud
Soft Skills: Adaptability, Communication ​Skills, Leadership, Problem ​Solving, Project ​Management, Research ​Skills,
Te