### environment setup (langsmith and tavily)

In [27]:
from dotenv import load_dotenv
import os

load_dotenv(override = True)

# Check what's actually in the environment
print("All environment variables with 'LANG' or 'TAVILY':")
for key, value in os.environ.items():
    if 'LANG' in key.upper() or 'TAVILY' in key.upper():
        print(f"{key}: {value[:10]}...")

# Check specifically for our keys
langsmith_key = os.environ.get("LANGSMITH_API_KEY")
tavily_key = os.environ.get("TAVILY_API_KEY")

print(f"\nLangSmith key exists: {langsmith_key is not None}")
print(f"Tavily key exists: {tavily_key is not None}")

All environment variables with 'LANG' or 'TAVILY':
LANG: en_US.UTF-...
LANGSMITH_API_KEY: lsv2_pt_68...
TAVILY_API_KEY: tvly-dev-j...

LangSmith key exists: True
Tavily key exists: True


### defining tools

In [26]:
from langchain_tavily import TavilySearch

search = TavilySearch(max_results=2)
search_results = search.invoke("What is the weather in SF")
print(search_results)

# If we want, we can create other tools.
# Once we have all the tools we want, we can put them in a list that we will reference later.
tools = [search]

{'query': 'What is the weather in SF', 'follow_up_questions': None, 'answer': None, 'images': [], 'results': [{'title': 'Weather in San Francisco, CA', 'url': 'https://www.weatherapi.com/', 'content': "{'location': {'name': 'San Francisco', 'region': 'California', 'country': 'United States of America', 'lat': 37.775, 'lon': -122.4183, 'tz_id': 'America/Los_Angeles', 'localtime_epoch': 1750878802, 'localtime': '2025-06-25 12:13'}, 'current': {'last_updated_epoch': 1750878000, 'last_updated': '2025-06-25 12:00', 'temp_c': 15.6, 'temp_f': 60.1, 'is_day': 1, 'condition': {'text': 'Partly cloudy', 'icon': '//cdn.weatherapi.com/weather/64x64/day/116.png', 'code': 1003}, 'wind_mph': 8.7, 'wind_kph': 14.0, 'wind_degree': 240, 'wind_dir': 'WSW', 'pressure_mb': 1018.0, 'pressure_in': 30.06, 'precip_mm': 0.0, 'precip_in': 0.0, 'humidity': 75, 'cloud': 50, 'feelslike_c': 15.6, 'feelslike_f': 60.1, 'windchill_c': 11.0, 'windchill_f': 51.8, 'heatindex_c': 12.2, 'heatindex_f': 54.0, 'dewpoint_c': 11.

### using language model & integrating tools (Tavily)

In [None]:
!pip install transformers torch

In [56]:
from transformers import pipeline
from langchain_tavily import TavilySearch

class LocalChatModel:
    """ Constructor """
    def __init__(self):
        self.generator = pipeline(
            "text-generation", 
            model="gpt2",
            pad_token_id=50256  # avoid warnings
        )

        self.tools = []
        self.tavily = TavilySearch(max_results=2)

    """ Format Tavily results nicely """
    def format_tavily_results(self, search_results):
        if not search_results or 'results' not in search_results:
            return "No results found."

        formatted = ""
    
        for i, result in enumerate(search_results['results'], 1):
            formatted += f"**Result {i}:**\n"
            formatted += f"Title: {result['title']}\n"
            formatted += f"URL: {result['url']}\n"
            formatted += f"Summary: {result['content'][:200]}...\n\n"
    
        return formatted

    """ Returning response based on query using gpt-2 model """
    def invoke(self, query):
        # extract query message
        if isinstance(query, list):
            user_message = query[-1]['content']  # extract from Tavily tool message format
        else:
            user_message = query  # already a string
            
        # check if search is needed based on words in query message
        needs_search = 'weather' in user_message.lower() or 'search' in user_message.lower()

        if needs_search:
            # go straight to Tavily, skip GPT-2 entirely
            search_results = self.tavily.invoke(user_message)
            response_content = self.format_tavily_results(search_results)
        else:
            # only use GPT-2 for non-search queries
            prompt = f"user says: {user_message}\nResponse:"

            response = self.generator(
                prompt,                  # given model context
                max_new_tokens=25,       # generate max 25 new words       
                temperature=0.8,         # creativity level (0=boring, 1=wild)     
                do_sample=True,          # use randomness (not always same answer)
                truncation=True,         # cut off if input too long
                num_return_sequences=1   # generate 1 response (not multiple)
            )

            generated_text = response[0]['generated_text']
            response_part = generated_text.split("Response:")[-1].strip()

            # rest of cleanup...
            if '\n' in response_part:
                # only getting one sentence
                response_part = response_part.split('\n')[0]
            if not response_part or len(response_part) < 3:
                # no response, or reponse is less than 3 characters
                response_part = "Sorry, I didn't generate a good response. Please rephrase query."
            response_content = response_part

        return type('Response', (), {'content': response_content, 'user_message': user_message})()


""" Testing """
model = LocalChatModel()
test_queries = [
    [{"role": "user", "content": "Hi! How are you?"}],
    [{"role": "user", "content": "What's the weather in SF?"}], 
    [{"role": "user", "content": "Search for Python tutorials"}]
]

for query in test_queries:
    response = model.invoke(query)
    print(f"Query:\n{response.user_message}\nResponse:\n{response.content}\n\n")


Device set to use mps:0


Query:
Hi! How are you?
Response:
This was a bit difficult to read. I can see that you're a programmer but you could also use a GUI to


Query:
What's the weather in SF?
Response:
**Result 1:**
Title: Weather in San Francisco, CA
URL: https://www.weatherapi.com/
Summary: {'location': {'name': 'San Francisco', 'region': 'California', 'country': 'United States of America', 'lat': 37.775, 'lon': -122.4183, 'tz_id': 'America/Los_Angeles', 'localtime_epoch': 1750881249, 'l...

**Result 2:**
Title: Weather in San Francisco in June 2025
URL: https://world-weather.info/forecast/usa/san_francisco/june-2025/
Summary: Weather in San Francisco in June 2025 (California) - Detailed Weather Forecast for a Month *   Weather in San Francisco Weather in San Francisco in June 2025 *   1 +63° +55° *   2 +66° +54° *   3 +66°...




Query:
Search for Python tutorials
Response:
**Result 1:**
Title: Python Tutorial - W3Schools
URL: https://www.w3schools.com/python/
Summary: Python Tutorial Tutorials "Tutorial

### using language models & memory

In [86]:
from langgraph.graph import StateGraph, MessagesState
from langgraph.checkpoint.memory import MemorySaver
from langchain_core.messages import HumanMessage, AIMessage
from transformers import pipeline

""" Rudimentary model for testing """
class GPT2Agent:
    def __init__(self):
        self.generator = pipeline("text-generation", model="gpt2", pad_token_id=50256)

    """ Returning response based on query using gpt-2 model"""
    """ More powerful models do this automatically """
    def invoke(self, messages):
        # get the latest message
        latest = messages[-1].content
        
        # simple prompt format that works with GPT-2
        prompt = f"User: {latest}\nBot:"
        response = self.generator(
            prompt,
            max_new_tokens=20,
            temperature=0.7,
            do_sample=True,
            repetition_penalty=1.2,
            pad_token_id=50256
        )
        
        # extract response
        generated = response[0]['generated_text']
        bot_response = generated.split("Bot:")[-1].strip()
        
        # clean up
        if '\n' in bot_response:
            bot_response = bot_response.split('\n')[0]
        if not bot_response or len(bot_response) < 3:
            bot_response = "I understand."

        return AIMessage(content=bot_response)

""" Create the LangGraph setup """
def create_gpt2_agent():
    model = GPT2Agent()

    """ Processing engine """
    def agent_node(state: MessagesState):
        """LangGraph calls this with full conversation history"""
        print(f"Agent sees {len(state['messages'])} messages in memory:")
        for i, msg in enumerate(state["messages"]):
            role = "Human" if msg.type == "human" else "AI"
            print(f"   {i+1}. {role}: {msg.content}")
        
        # AI model gets FULL conversation context
        response = model.invoke(state["messages"])

        # returning new state
        return {"messages": [response]}
    
    # build LangGraph workflow
    workflow = StateGraph(MessagesState)        # like blueprint of agentic model
    workflow.add_node("agent", agent_node)      # add one processing unit
    workflow.set_entry_point("agent")           # start here
    workflow.set_finish_point("agent")          # end here
    
    # add memory persistence--separate from nodes!
    memory = MemorySaver()
    app = workflow.compile(checkpointer=memory) # connect memory to workflow
    
    return app

""" Testing memory """
app = create_gpt2_agent()

# CONVERSATION A: Alice's Chat
print("=" * 60)
print("ALICE's CHAT")
print("=" * 60)
config_alice = {"configurable": {"thread_id": "alice_chat"}}

print("Alice Turn 1:")
alice_1 = app.invoke(
    {"messages": [HumanMessage("Hi, my name is Alice and I live in New York")]}, 
    config_alice
)

print("\nAlice Turn 2:")
alice_2 = app.invoke(
    {"messages": alice_1["messages"] + [HumanMessage("What's my name?")]}, 
    config_alice
)

print("\nAlice Turn 3:")
alice_3 = app.invoke(
    {"messages": alice_2["messages"] + [HumanMessage("Where do I live?")]}, 
    config_alice
)

# CONVERSATION B: Bob's Chat (Different Thread)
print("\n")
print("=" * 60)
print("BOB'S CHAT")
print("=" * 60)
config_bob = {"configurable": {"thread_id": "bob_chat"}}

print("Bob Turn 1:")
bob_1 = app.invoke(
    {"messages": [HumanMessage("Hello, I'm Bob and I work as a teacher")]}, 
    config_bob
)

print("\nBob Turn 2:")
bob_2 = app.invoke(
    {"messages": bob_1["messages"] + [HumanMessage("What's my job?")]}, 
    config_bob
)

print("\nBob Turn 3:")
bob_3 = app.invoke(
    {"messages": bob_2["messages"] + [HumanMessage("Do you remember my name?")]}, 
    config_bob
)


# RETURN TO ALICE'S CONVERSATION
print("\n")
print("=" * 60)
print("\nBACK TO ALICE'S CONVERSATION")
print("=" * 60)

print("Alice Turn 4 (returning to her thread):")
alice_4 = app.invoke(
    {"messages": alice_3["messages"] + [HumanMessage("How many times have we talked?")]}, 
    config_alice  # Same thread_id as Alice's original conversation
)

# CROSS-CONTAMINATION TEST
print("\n")
print("=" * 60)
print("\nCROSS-CONTAMINATION TEST")
print("=" * 60)

print("Alice asks about Bob (should NOT know Bob):")
alice_cross = app.invoke(
    {"messages": alice_4["messages"] + [HumanMessage("Do you know anyone named Bob?")]}, 
    config_alice
)

print("\nBob asks about Alice (should NOT know Alice):")
bob_cross = app.invoke(
    {"messages": bob_3["messages"] + [HumanMessage("Do you know anyone named Alice?")]}, 
    config_bob
)

# FINAL SUMMARY
print("\n")
print("=" * 60)
print("FINAL CONVERSATION STATES")
print("=" * 60)

print(f"\nAlice's conversation has {len(alice_cross['messages'])} messages:")
for i, msg in enumerate(alice_cross['messages'], 1):
    role = "Human" if msg.type == "human" else "AI"
    print(f"   {i}. {role}: {msg.content[:50]}...")

print(f"\nBob's conversation has {len(bob_cross['messages'])} messages:")
for i, msg in enumerate(bob_cross['messages'], 1):
    role = "Human" if msg.type == "human" else "AI"
    print(f"   {i}. {role}: {msg.content[:50]}...")


# KEY INSIGHTS
print("\n")
print("=" * 60)
print("\nKEY INSIGHTS FROM THIS TEST")
print("=" * 60)
print("1. Each thread_id maintains completely separate conversation history")
print("2. Alice's conversation never knows about Bob's conversation")
print("3. Bob's conversation never knows about Alice's conversation") 
print("4. You can return to any conversation thread and it remembers everything")
print("5. Each thread grows independently (different message counts)")
print("6. Thread isolation prevents data leakage between conversations")

print(f"\nCONVERSATION LENGTHS:")
print(f"   Alice: {len(alice_cross['messages'])} messages")
print(f"   Bob: {len(bob_cross['messages'])} messages") 


Device set to use mps:0


ALICE's CHAT
Alice Turn 1:
Agent sees 1 messages in memory:
   1. Human: Hi, my name is Alice and I live in New York

Alice Turn 2:
Agent sees 3 messages in memory:
   1. Human: Hi, my name is Alice and I live in New York
   2. AI: Hello? My Name Is... Me! Bot. [Laughter] Wow was she cute but..
   3. Human: What's my name?

Alice Turn 3:
Agent sees 5 messages in memory:
   1. Human: Hi, my name is Alice and I live in New York
   2. AI: Hello? My Name Is... Me! Bot. [Laughter] Wow was she cute but..
   3. Human: What's my name?
   4. AI: I'm in the company of a man named "The Man on Fire". He killed his wife and
   5. Human: Where do I live?


BOB'S CHAT
Bob Turn 1:
Agent sees 1 messages in memory:
   1. Human: Hello, I'm Bob and I work as a teacher

Bob Turn 2:
Agent sees 3 messages in memory:
   1. Human: Hello, I'm Bob and I work as a teacher
   2. AI: Hi! My name is Tim of The Daily Beast. What are you doing here? (pause)
   3. Human: What's my job?

Bob Turn 3:
Agent sees 5 message