In [1]:
import getpass
import os

os.environ["LANGSMITH_TRACING"] = "true"
os.environ["LANGSMITH_API_KEY"] = getpass.getpass()

if not os.environ.get("GROQ_API_KEY"):
  os.environ["GROQ_API_KEY"] = getpass.getpass("Enter API key for Groq: ")


In [2]:

from langchain.chat_models import init_chat_model

llm = init_chat_model("llama-3.3-70b-versatile", model_provider="groq")

In [3]:
llm.invoke("list 5 universities in california")

Failed to multipart ingest runs: langsmith.utils.LangSmithError: Failed to POST https://api.smith.langchain.com/runs/multipart in LangSmith API. HTTPError('403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Forbidden"}\n')


AIMessage(content='Here are 5 universities in California:\n\n1. Stanford University\n2. University of California, Berkeley\n3. University of California, Los Angeles (UCLA)\n4. University of Southern California (USC)\n5. California Institute of Technology (Caltech)', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 54, 'prompt_tokens': 41, 'total_tokens': 95, 'completion_time': 0.196363636, 'prompt_time': 0.004631911, 'queue_time': 0.23414583800000002, 'total_time': 0.200995547}, 'model_name': 'llama-3.3-70b-versatile', 'system_fingerprint': 'fp_7b42aeb9fa', 'finish_reason': 'stop', 'logprobs': None}, id='run-da03b0b1-f806-4b31-8bdf-5fe07a4d191f-0', usage_metadata={'input_tokens': 41, 'output_tokens': 54, 'total_tokens': 95})

Failed to send compressed multipart ingest: langsmith.utils.LangSmithError: Failed to POST https://api.smith.langchain.com/runs/multipart in LangSmith API. HTTPError('403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Forbidden"}\n')
Failed to send compressed multipart ingest: langsmith.utils.LangSmithError: Failed to POST https://api.smith.langchain.com/runs/multipart in LangSmith API. HTTPError('403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Forbidden"}\n')
Failed to send compressed multipart ingest: langsmith.utils.LangSmithError: Failed to POST https://api.smith.langchain.com/runs/multipart in LangSmith API. HTTPError('403 Client Error: Forbidden for url: https://api.smith.langchain.com/runs/multipart', '{"error":"Forbidden"}\n')
Failed to send compressed multipart ingest: langsmith.utils.LangSmithError: Failed to POST https://api.smith.langchain.com/runs/multipart in LangSmith API. HTTP

In [4]:
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")

  from .autonotebook import tqdm as notebook_tqdm
  return torch._C._cuda_getDeviceCount() > 0


In [5]:
#initialize the vector store
from langchain_chroma import Chroma

vector_store = Chroma(embedding_function=embeddings)

In [60]:
# docs[:5]

In [6]:

from langchain import hub
from langchain_community.document_loaders import TextLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict

# Load and chunk contents of the blog
loader = TextLoader("expanded_descriptions.txt")
docs = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=500)
all_splits = text_splitter.split_documents(docs)

# Index chunks
_ = vector_store.add_documents(documents=all_splits)

# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")


In [29]:
# Define prompt for question-answering
prompt = """Use the following pieces of context to answer the question at the end.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
Try to explain the answer in brief.
Always try to ask a follow up question for the given answer."
{context}

Question: {question}

Helpful Answer:"""

In [30]:
#state for application
class State(TypedDict):
    question : str
    context : List[Document]
    answer: str


# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.format(
        question= state["question"], 
        context= docs_content
        )
    response = llm.invoke(messages)
    return {"answer": response.content}


In [31]:
# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [32]:
print(State["context"])

__main__.State['context']


In [34]:
response = graph.invoke({"question": "cwhat thing makes harvard better than stanford"})
print(response["answer"])

Based on the provided context, one thing that makes Harvard better than Stanford is its higher global ranking, with Harvard ranked 3rd globally, while Stanford is ranked 6th. Additionally, Harvard's research score (99.9) and citations score (99.3) are higher than those mentioned for the other universities, indicating a stronger research impact.

Follow-up question: What specific areas of research or academic programs do you think contribute to Harvard's higher ranking and research impact compared to Stanford?


In [63]:
state = {"question": "5 universities", "context": [], "answer": ""}
state.update(retrieve(state))
state.update(generate(state))

print(state["answer"])

The context only mentions 1 university: University of Maryland (https://umd.edu/). I don't know the other 4 universities.


In [64]:
state = {"question": "5 universities", "context": [], "answer": ""}
state.update(retrieve(state))
print(state["context"])

[Document(id='271b21a1-0914-4fae-9200-e4fdad62f96e', metadata={'seq_num': 40, 'source': '/home/jivan-acharya/Documents/Internship/Code/RAG-app/uni_with_description.json'}, page_content='& Theology,Sociology,Civil Engineering,Education,Other Health,Veterinary Science,Chemical Engineering,Communication & Media Studies", "Location": "College Park, United States", "Description": null, "uni_url": "https://umd.edu/"}'), Document(id='adc8963e-13e8-4475-a8db-22689f1e1035', metadata={'seq_num': 40, 'source': '/home/jivan-acharya/Documents/Internship/Code/RAG-app/uni_with_description.json'}, page_content='& Theology,Sociology,Civil Engineering,Education,Other Health,Veterinary Science,Chemical Engineering,Communication & Media Studies", "Location": "College Park, United States", "Description": null, "uni_url": "https://umd.edu/"}'), Document(id='a51ad530-6a95-4564-8372-8557481fcdaa', metadata={'seq_num': 40, 'source': '/home/jivan-acharya/Documents/Internship/Code/RAG-app/uni_with_description.js