In [1]:
import torch
from langchain_community.llms import Ollama
from langchain.tools import tool
from langchain.agents import create_react_agent, AgentExecutor
from langchain.prompts import PromptTemplate

In [2]:
def clear_cuda_memory():
    """
    Clear CUDA memory cache to free up GPU resources between queries.
    Only has an effect if CUDA is available.
    """
    if torch.cuda.is_available():
        torch.cuda.empty_cache()
        import gc
        gc.collect()
        print("CUDA memory cache cleared")
    return

In [3]:
clear_cuda_memory()

CUDA memory cache cleared


In [4]:
llm_model = 'qwen3:1.7b' #works: qwen3:1.7b, qwen3:latest, hir0rameel/qwen-claude:latest, mistral-small3.2:latest
#not working (consistently): llama3.2, qwq, qwen3:0.6b, qwen3:14b,qwen3:30b-a3b, deepseek-r1:latest, atombuild/deepseek-r1-claude3.7:14b

In [5]:
llm = Ollama(model=llm_model)

  llm = Ollama(model=llm_model)


In [6]:
@tool
def get_current_weather(location: str = "Unknown", format: str = "celsius") -> str:
    """Get the current weather for a specific location. ONLY use this tool when the user explicitly asks about weather conditions, temperature, or weather forecasts for a particular place.
    
    Args:
        location: The location to get weather for
        format: Temperature format (celsius or fahrenheit)
    """
    return f"Sample weather for {location} in {format} is 20° and sunny!"

In [7]:
tools = [get_current_weather]

In [8]:
prompt = PromptTemplate.from_template("""You are a smart assistant. Analyze the user query carefully and answer it appropriately.

ONLY call a tool if:
1. The user query specifically requires information that the tool can provide
2. The tool is directly relevant to answering the user's question

For general conversation, greetings, or questions that don't require tool functionality, respond directly without calling any tools.

You have access to the following tools:

{tools}

Use the following format:

Question: the input question you must answer
Thought: you should always think about what to do
Action: the action to take, should be one of [{tool_names}]
Action Input: the input to the action
Observation: the result of the action
... (this Thought/Action/Action Input/Observation can repeat N times)
Thought: I now know the final answer
Final Answer: the final answer to the original input question

Begin!

Question: {input}
Thought:{agent_scratchpad}""")

In [9]:
agent = create_react_agent(llm, tools, prompt)

In [10]:
agent_executor = AgentExecutor(
    agent=agent, 
    tools=tools, 
    verbose=True,
    handle_parsing_errors=True
)


In [11]:
for i in range(100):
    print(i)
    response = agent_executor.invoke({"input": "What's the weather in Aachen, in celsius?"})
    print(response)
    clear_cuda_memory()

0


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m<think>
Okay, the user is asking for the weather in Aachen in Celsius. Let me check if I need to use a tool here. The available tool is get_current_weather, which requires the location and format. The user specified Aachen and celsius, so I should use that tool. The location is Aachen, and the format is celsius. I'll call the tool with those parameters. Once I get the observation, I'll convert it to the final answer.
</think>

Action: get_current_weather
Action Input: location="Aachen", format="celsius"
[0m[36;1m[1;3mSample weather for location="Aachen", format="celsius"
 in celsius is 20° and sunny![0m[32;1m[1;3m<think>
Thought: I now know the final answer
Final Answer: The current weather in Aachen is 20°C and sunny.[0m

[1m> Finished chain.[0m
{'input': "What's the weather in Aachen, in celsius?", 'output': 'The current weather in Aachen is 20°C and sunny.'}
CUDA memory cache cleared
1


[1m> Entering new Agent

In [12]:
for i in range(100):
    print(i)
    response2 = agent_executor.invoke({"input": "Hi how are u?"})
    print(response2)
    clear_cuda_memory()

0


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m<think>
Okay, the user said "Hi how are u?" Which is a greeting. They're probably just wishing me well. Since they didn't ask for weather or any specific information, I don't need to use any tools. I should just respond politely.
</think>

Thought: The user is greeting me, which is a general conversation. No tool is needed here.
Final Answer: Hello! How can I assist you today?[0m

[1m> Finished chain.[0m
{'input': 'Hi how are u?', 'output': 'Hello! How can I assist you today?'}
CUDA memory cache cleared
1


[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m<think>
Okay, the user said "Hi how are u?" Which is a greeting. They're probably just wishing me well. Since they didn't ask for weather or any specific information, I don't need to use any tools. I should just respond politely.
</think>

Thought: The user is greeting me, which is a general conversation. No tool is needed here.
Final Answer: Hello! How can I 