In [12]:
import os
api_key = os.environ["LANGCHAIN_API"] 

In [13]:
gemini_api = os.environ["GEMINI_API"]

In [14]:
import bs4
import getpass
from langchain_community.document_loaders import WebBaseLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_google_genai import GoogleGenerativeAIEmbeddings, GoogleGenerativeAI
from langchain_chroma import Chroma

In [15]:
#documents 

pdf_url = "https://education.github.com/git-cheat-sheet-education.pdf" 
print(f"Loading document from URL: {pdf_url}...")

from langchain_community.document_loaders import PyPDFLoader
loader = PyPDFLoader(file_path= pdf_url)
docs = loader.load()
print("succesfully loaded")

Loading document from URL: https://education.github.com/git-cheat-sheet-education.pdf...


Ignoring wrong pointing object 11 0 (offset 0)


succesfully loaded


In [16]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 200)

In [17]:
splits = text_splitter.split_documents(docs)

In [18]:
print(splits)

[Document(metadata={'producer': 'Mac OS X 10.9.1 Quartz PDFContext', 'creator': 'Adobe Illustrator CC (Macintosh)', 'creationdate': "D:20140224195805Z00'00'", 'title': 'git-cheat-sheet-education', 'moddate': "D:20140224195805Z00'00'", 'source': 'https://education.github.com/git-cheat-sheet-education.pdf', 'total_pages': 2, 'page': 0, 'page_label': '1'}, page_content='GIT CHEAT SHEET\nSTAGE & SNAPSHOT\nWorking with snapshots and the Git staging area\ngit status\nshow modiﬁed ﬁles in working directory, staged for your next commit\ngit add [file]\nadd a ﬁle as it looks now to your next commit (stage)\ngit reset [file]\nunstage a ﬁle while retaining the changes in working directory\ngit diff\ndiﬀ of what is changed but not staged\ngit diff --staged\ndiﬀ of what is staged but not yet committed\ngit commit -m “[descriptive message]”\ncommit your staged content as a new commit snapshot\nSETUP\nConﬁguring user information used across all local repositories\ngit config --global user.name “[firs

In [19]:
import os
try:
    gemini_key = os.environ["GEMINI_API"]
except KeyError:
    raise EnvironmentError(
        "The environment variable 'GEMINI_API' is not set. Please set it to your API key."
    )


import os
os.environ["GOOGLE_API_KEY"] = os.environ.get("GEMINI_API", "")


embeddings = GoogleGenerativeAIEmbeddings(
    model="models/gemini-embedding-001", 
    api_key=gemini_key,
    credentials= None
)
vectorstore = Chroma.from_documents(
    documents=splits,
    embedding=embeddings
)

In [20]:
retriever = vectorstore.as_retriever()

In [21]:
from langsmith import Client
client = Client()  # reads LANGCHAIN_API_KEY/LANGCHAIN_USERNAME from env if needed
prompt = client.pull_prompt("rlm/rag-prompt")
print(prompt)


input_variables=['context', 'question'] input_types={} partial_variables={} metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'} messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


In [22]:
llm = GoogleGenerativeAI(
    model="gemini-2.5-flash-lite"
)


In [23]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [24]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

In [26]:
rag_chain.invoke("what is git - why is it used - what docs  used")

'Git is a free and open-source distributed version control system used for managing changes locally on your computer. It is utilized for tasks such as staging and snapshotting changes, configuring user information, initializing and cloning repositories, and managing branches and merges. The provided documentation includes a Git cheat sheet with commonly used commands and information on installation and GUIs.'