In [1]:
# You can set them inline
import os

In [2]:
# Or you can use a .env file
from dotenv import load_dotenv
load_dotenv(dotenv_path="../../../.env", override=True)

True

In [8]:
from notebooks.module_3.lesson_4.app import langsmith_rag

question = "How do I create a github commit?"
langsmith_rag(question)

  ret = a @ b
  ret = a @ b
  ret = a @ b


'To create a GitHub commit, you need to call an intermediary server that authenticates using a GitHub Personal Access Token (PAT) with the appropriate scopes. The server will then be able to make commits to your repository. Ensure to generate the token in your GitHub settings and store it securely as an environment variable for your server.'

In [7]:
from langsmith import Client

example_dataset = [
    (
        "How do I create a new repository on GitHub from the command line?",
        "You can use the official GitHub CLI (`gh`). Once it's installed and you're authenticated, you can use the `gh repo create` command to create a new repository directly from your terminal.\n\n```bash\n# Create a new public repository named 'my-new-project'\ngh repo create my-new-project --public\n\n# Create a private one and push an existing local repo\ngh repo create my-secret-project --private --source=. --remote=origin\n```\n\nThis makes it fast and easy to get your projects onto GitHub without leaving the command line."
    ),
    (
        "How can I undo my last commit on GitHub?",
        "You undo commits locally using Git and then push the updated history to GitHub. A common way to undo the last commit is with `git reset`. Use `--soft` to keep your file changes, or `--hard` to discard them completely.\n\n```bash\n# Go back one commit, but keep your changes staged\ngit reset --soft HEAD~\n\n# WARNING: This will delete your changes permanently\ngit reset --hard HEAD~\n``` \n\nAfter resetting, you will likely need to force-push to update the branch on GitHub, for example: `git push origin main --force`."
    ),
    (
        "How do I resolve a merge conflict in a pull request?",
        "To resolve a merge conflict, you first need to pull the latest changes from the base branch into your feature branch. Git will then mark the conflicts in the files for you with markers (`<<<<<<<`, `=======`, `>>>>>>>`).\n\n```diff\n<<<<<<< HEAD\nThis is your conflicting change.\n=======\nThis is the conflicting change from the other branch.\n>>>>>>> branch-name\n```\n\nYou must manually edit the file to remove the markers and decide which code to keep. Once you've fixed all conflicts, you add the resolved files with `git add .`, commit them with `git commit`, and push the changes back to your branch to update the pull request."
    ),
]

client = Client()
dataset_name = "Github Questions"

# Create dataset
dataset = client.create_dataset(
    dataset_name=dataset_name, description="Technical questions about Github"
)

# Prepare inputs and outputs
# FIX: Unpack two values (q, a) and create inputs without 'context'
inputs = [{"question": q} for q, a in example_dataset]
# FIX: Unpack two values and use the second one ('a') for the output
outputs = [{"output": a} for q, a in example_dataset]

# Create examples in the dataset
client.create_examples(
    inputs=inputs,
    outputs=outputs,
    dataset_id=dataset.id,
)

print(f"Dataset '{dataset_name}' created successfully.")

Dataset 'Github Questions' created successfully.


In [9]:
from langchain import hub
prompt = hub.pull("git_terminal")

In [10]:
from langsmith.client import convert_prompt_to_openai_format
import os
import tempfile
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders.sitemap import SitemapLoader
from langchain_community.vectorstores import SKLearnVectorStore
from langchain_openai import OpenAIEmbeddings
from langsmith import traceable
from openai import OpenAI
from typing import List
import nest_asyncio

MODEL_NAME = "gpt-4o-mini"
MODEL_PROVIDER = "openai"
APP_VERSION = 1.0

# TODO: Remove this hard-coded prompt and replace it with Prompt Hub

openai_client = OpenAI()

def get_vector_db_retriever():
    persist_path = os.path.join(tempfile.gettempdir(), "union.parquet")
    embd = OpenAIEmbeddings()

    # If vector store exists, then load it
    if os.path.exists(persist_path):
        vectorstore = SKLearnVectorStore(
            embedding=embd,
            persist_path=persist_path,
            serializer="parquet"
        )
        return vectorstore.as_retriever(lambda_mult=0)

    # Otherwise, index LangSmith documents and create new vector store
    ls_docs_sitemap_loader = SitemapLoader(web_path="https://docs.smith.langchain.com/sitemap.xml", continue_on_failure=True)
    ls_docs = ls_docs_sitemap_loader.load()

    text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
        chunk_size=500, chunk_overlap=0
    )
    doc_splits = text_splitter.split_documents(ls_docs)

    vectorstore = SKLearnVectorStore.from_documents(
        documents=doc_splits,
        embedding=embd,
        persist_path=persist_path,
        serializer="parquet"
    )
    vectorstore.persist()
    return vectorstore.as_retriever(lambda_mult=0)

nest_asyncio.apply()
retriever = get_vector_db_retriever()

"""
retrieve_documents
- Returns documents fetched from a vectorstore based on the user's question
"""
@traceable(run_type="chain")
def retrieve_documents(question: str):
    return retriever.invoke(question)

"""
generate_response
- Calls `call_openai` to generate a model response after formatting inputs
"""
@traceable(run_type="chain")
def generate_response(question: str, documents):
    formatted_docs = "\n\n".join(doc.page_content for doc in documents)
    # TODO: Let's use our prompt pulled from Prompt Hub instead of manually formatting here!
    formatted_prompt = prompt.invoke({"context":formatted_docs, "question": question})
    messages = convert_prompt_to_openai_format(formatted_prompt)["messages"]
    return call_openai(messages)

"""
call_openai
- Returns the chat completion output from OpenAI
"""
@traceable(
    run_type="llm",
    metadata={
        "ls_provider": MODEL_PROVIDER,
        "ls_model_name": MODEL_NAME
    }
)
def call_openai(messages: List[dict]) -> str:
    return openai_client.chat.completions.create(
        model=MODEL_NAME,
        messages=messages,
    )

"""
langsmith_rag
- Calls `retrieve_documents` to fetch documents
- Calls `generate_response` to generate a response based on the fetched documents
- Returns the model response
"""
@traceable(run_type="chain")
def langsmith_rag(question: str):
    documents = retrieve_documents(question)
    response = generate_response(question, documents)
    return response.choices[0].message.content


In [11]:
question = "How to push a new change into a public repository?"
langsmith_rag(question)

  ret = a @ b
  ret = a @ b
  ret = a @ b


'To push a new change into a public repository, first stage your changes with `git add .`, then commit them using `git commit -m "Your commit message"`. Finally, push the changes to the remote repository with `git push origin main`, replacing "main" with your branch name if necessary.'