In [None]:
# Core dependencies
#%pip install python-dotenv

# LangChain ecosystem
#%pip install langchain
#%pip install langchain-openai
#%pip install langchain-chroma
#%pip install langchain-google-community[drive]

# Vector store
#%pip install chromadb

# LLM APIs
#%pip install openai
#%pip install anthropic

# LangGraph
#%pip install langgraph


In [None]:
import os
import time
from dotenv import load_dotenv

# LangChain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_community import GoogleDriveLoader
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain.chat_models import init_chat_model

# LangGraph
from langchain_core.tools import create_retriever_tool
from langgraph.prebuilt import ToolNode, tools_condition
from langgraph.graph import StateGraph, MessagesState, START, END
from langchain_core.messages import SystemMessage

# Typing
from typing import Annotated
from typing_extensions import TypedDict

In [None]:
load_dotenv(override=True)

In [None]:


os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY', 'your-key-if-not-using-env')
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "credentials.json"
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY', 'your-key-if-not-using-env')

In [None]:
#Adding delay time between extractions so we don't surpass the Googles API Quota limit
original_load_sheet = GoogleDriveLoader._load_sheet_from_id
def load_sheet_with_delay(self, sheet_id):
    print(f"Loading sheet: {sheet_id}")
    time.sleep(2)  # Wait 2 seconds between each sheet
    return original_load_sheet(self, sheet_id)



In [None]:
loader = GoogleDriveLoader(
    folder_id="root",
    credentials_path="credentials.json",
    token_path="token.json",
    recursive=True,
    file_types=["pdf","sheets","documents"],
    scopes=['https://www.googleapis.com/auth/drive.readonly']
)

In [None]:
documents = loader.load()
print(f"Loaded {len(documents)} documents")

In [None]:
import time

In [None]:
# Check if 'documents' variable exists and has content
try:
    print(f"Documents in memory: {len(documents)} documents")
except NameError:
    print("'documents' variable not found - need to reload or restore")

In [None]:
#This is because i had to load the docuemnt in a separated way
all_documents = documents + sheets_documents + pdf_documents
print(f"Total documents: {len(all_documents)}")

In [None]:
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200
)
split_docs = text_splitter.split_documents(documents)

embeddings = OpenAIEmbeddings()
vectorstore = Chroma.from_documents(
    documents=split_docs,
    embedding=embeddings,
    persist_directory=".......................¨
)

In [None]:
vectorstore = Chroma(
    persist_directory=".......................¨,
    embedding_function=OpenAIEmbeddings()
)

In [None]:
retriever = vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k": 12}  # Return top 12 most relevant chunks, why ? bc yeah
)

In [None]:
############################## NOW WE CREATE A LANGGRAPH SYSTEM, THAT  USES CLAUDE(ANTRHOPIC) ##########################

In [None]:

###New Libraries needed for langgraph
from langchain_core.tools import create_retriever_tool
from langgraph.prebuilt import ToolNode
from langgraph.graph import MessagesState

In [None]:
import os
from langchain.chat_models import init_chat_model
load_dotenv(override=True)

os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY")
os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")




llm = init_chat_model("anthropic:claude-3-5-sonnet-latest")

In [None]:
##CREATE A STATE GRAPH -> NOW EACH NODE CAN RECIVE THE CURRENT STATE AS INPUT AND OUTPUT AND UPDATE TO THE STATE
#Updates to messages will be appended to the existing list rather than overwritingit, thanks to prebuilt reducer function.
from typing import Annotated

from typing_extensions import TypedDict

from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages


class State(TypedDict):
    # Messages have the type "list". The `add_messages` function
    # in the annotation defines how this state key should be updated
    # (in this case, it appends messages to the list, rather than overwriting them)
    messages: Annotated[list, add_messages]
    retrieved_docs: list  


graph_builder = StateGraph(State)

In [None]:
def chatbot(state: State):
    return {"messages": [llm.invoke(state["messages"])]}


# The first argument is the unique node name
# The second argument is the function or object that will be called whenever
# the node is used.
graph_builder.add_node("chatbot", chatbot)

In [None]:
from langchain.chat_models import init_chat_model
from typing_extensions import TypedDict

from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages


In [None]:
graph_builder.add_edge(START, "chatbot")
graph_builder.add_edge("chatbot", END)
graph = graph_builder.compile()


In [None]:
os.environ["LANGCHAIN_TRACING_V2"] = "false"

In [None]:
def stream_graph_updates(user_input: str):
    for event in graph.stream({"messages": [{"role": "user", "content": user_input}]}):
        for value in event.values():
            print("Assistant:", value["messages"][-1].content)


while True:
    try:
        user_input = input("User: ")
        if user_input.lower() in ["quit", "exit", "q"]:
            print("Goodbye!")
            break
        stream_graph_updates(user_input)
    except:
        # fallback if input() is not available
        user_input = "What do you know about LangGraph?"
        print("User: " + user_input)
        stream_graph_updates(user_input)
        break

In [None]:
import os
from dotenv import load_dotenv
# UPDATED IMPORT - changed from langchain.tools.retriever
from langchain_core.tools import create_retriever_tool
from langgraph.prebuilt import ToolNode, tools_condition
from langgraph.graph import StateGraph, MessagesState, START, END
from langchain.chat_models import init_chat_model
from langchain_core.messages import SystemMessage

# Load environment variables
load_dotenv(override=True)
os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY")

# Initialize LLM
llm = init_chat_model("claude-3-5-sonnet-20241022", model_provider="anthropic")

# Create retriever tool using your existing retriever
retriever_tool = create_retriever_tool(
    retriever,  # Your existing retriever object goes here
    name="retrieve_documents", 
    description="Search and return relevant information from the document collection to answer questions."
)

# LLM with tools bound
llm_with_tools = llm.bind_tools([retriever_tool])

# SYSTEM MESSAGE FOR MARKDOWN FORMATTING
MARKDOWN_SYSTEM_MESSAGE = SystemMessage(content="""You are a helpful AI assistant that always responds in well-formatted Markdown.

Follow these formatting guidelines:
- Use **bold** for important terms and concepts
- Use *italics* for emphasis
- Use `code blocks` for technical terms, file names, or code
- Use bullet points with - or * for lists
- Use ## for section headers when organizing longer responses
- Use > for quotes or important notes
- Use numbered lists (1., 2., 3.) for step-by-step instructions
- When citing sources, use proper markdown links if available

Always make your responses clear, well-structured, and visually appealing using markdown formatting.""")

def query_or_respond(state: MessagesState):
    """LLM decides whether to retrieve or respond directly."""
    messages = state["messages"]
    
    # Add system message if this is the start of conversation
    if not any(isinstance(msg, SystemMessage) for msg in messages):
        messages = [MARKDOWN_SYSTEM_MESSAGE] + messages

    response = llm_with_tools.invoke(state["messages"])
    return {"messages": [response]}

def generate_response(state: MessagesState):
    """Generate final response using retrieved context."""
    messages = state["messages"]
    # Ensure system message is present
    if not any(isinstance(msg, SystemMessage) for msg in messages):
        messages = [MARKDOWN_SYSTEM_MESSAGE] + messages

    response = llm.invoke(state["messages"])
    return {"messages": [response]}

# Create ToolNode for automatic tool execution
tools = ToolNode([retriever_tool])

# Build the graph
graph_builder = StateGraph(MessagesState)
graph_builder.add_node("query_or_respond", query_or_respond)
graph_builder.add_node("retrieve", tools)
graph_builder.add_node("generate", generate_response)

# Add conditional edges
graph_builder.add_edge(START, "query_or_respond")
graph_builder.add_conditional_edges(
    "query_or_respond",
    tools_condition,
    {
        "tools": "retrieve",
        END: END,
    }
)
graph_builder.add_edge("retrieve", "generate")
graph_builder.add_edge("generate", END)

# Compile graph
graph = graph_builder.compile()

# Streaming function
def stream_graph_updates(user_input: str):
    """
    Ch"""
    for event in graph.stream({"messages": [{"role": "user", "content": user_input}]}):
        for value in event.values():
            if "messages" in value and value["messages"]:
                last_message = value["messages"][-1]
                if hasattr(last_message, 'content'):
                    print("Assistant:", last_message.content)
                    print()

# Main loop
while True:
    try:
        user_input = input("User: ")
        if user_input.lower() in ["quit", "exit", "q"]:
            print("Goodbye!")
            break
        stream_graph_updates(user_input)
    except KeyboardInterrupt:
        print("\nGoodbye!")
        break