In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

In [None]:
device = "cpu"

In [None]:
model_id = "HuggingFaceTB/SmolLM3-3B"

print("Descargando y cargando SmolLM3 en bfloat16...")

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map={"": device}, 
    low_cpu_mem_usage=True
)

print(f"Modelo cargado correctamente en: {model.device}")


In [None]:

# Test rápido
prompt = "Escribe una lista de exactamente 3 pasos consisos para analizar una radiografía."
inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=200)
print(tokenizer.decode(outputs[0], skip_special_tokens=True))

In [1]:
import asyncio
from mcp import ClientSession, StdioServerParameters
from mcp.client.stdio import stdio_client
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
import json
import os
import sys
import re

device = "cpu"

# 1. Configuración del Modelo (Cerebro)
model_id = "HuggingFaceTB/SmolLM3-3B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id, 
    dtype=torch.bfloat16, 
    device_map={"": device},
    low_cpu_mem_usage=True
)

# 2. Configuración del Servidor MCP (Manos)
server_params = StdioServerParameters(
    command=sys.executable,
    args=["mcp_server.py"], 
)

async def run_agent():
    with open("mcp_debug.log", 'w') as fnull:
        async with stdio_client(server_params, errlog=fnull) as (read, write):
            async with ClientSession(read, write) as session:
                # Inicializar conexión con el servidor MCP
                await session.initialize()
                
                # Listar herramientas disponibles en el servidor
                tools = await session.list_tools()
                print(f"Detected tools: {[t.name for t in tools.tools]}")

                SYSTEM_PROMPT = """You are a medical assistant. To use a tool, you MUST use this format:
<tool_call>{"name": "tool_name", "arguments": {"arg1": "value"}}</tool_call>
You have access to these tools:
""" + "\n-".join([t.name for t in tools.tools])

                messages = [
                {"role": "system", "content": SYSTEM_PROMPT},
                {"role": "user", "content": "List the files and tell me what is in the first one."}
            ]


            for _ in range(3):
                
                # --- Bucle de Razonamiento del Agente ---

                prompt = tokenizer.apply_chat_template(
                    messages, 
                    tokenize=False, 
                    add_generation_prompt=True
                )

                inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
                with torch.no_grad():
                    outputs = model.generate(**inputs,
                                            max_new_tokens=200,
                                            temperature=0.01,
                                            pad_token_id=tokenizer.eos_token_id
                                            )
                response = tokenizer.decode(outputs[0][inputs.input_ids.shape[-1]:], skip_special_tokens=True)
                
                print(f"\nSmolLM3 decidió: {response}")

                match = re.search(r"<tool_call>(.*?)</tool_call>", response, re.DOTALL)

                if match:
                    tool_data = json.loads(match.group(1))
                    tool_name = tool_data["name"]
                    tool_args = tool_data.get("arguments", {})

                    print(f"--- Ejecutando herramienta: {tool_name} ---")
                    
                    # LLAMADA EFECTIVA AL SERVIDOR MCP
                    result = await session.call_tool(tool_name, arguments=tool_args)
                    
                    # Añadimos el resultado al historial para que el modelo lo vea
                    messages.append({"role": "assistant", "content": response})
                    messages.append({"role": "user", "content": f"Tool result: {result.content}"})
                else:
                    break


# if __name__ == "__main__":
#     asyncio.run(run_agent())

await run_agent()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

  + Exception Group Traceback (most recent call last):
  |   File "c:\Users\Laura\anaconda3\envs\python314\Lib\site-packages\IPython\core\interactiveshell.py", line 3697, in run_code
  |     await eval(code_obj, self.user_global_ns, self.user_ns)
  |   File "C:\Users\Laura\AppData\Local\Temp\ipykernel_20704\1328441570.py", line 94, in <module>
  |     await run_agent()
  |   File "C:\Users\Laura\AppData\Local\Temp\ipykernel_20704\1328441570.py", line 31, in run_agent
  |     async with stdio_client(server_params, errlog=fnull) as (read, write):
  |                ~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  |   File "c:\Users\Laura\anaconda3\envs\python314\Lib\contextlib.py", line 235, in __aexit__
  |     await self.gen.athrow(value)
  |   File "c:\Users\Laura\anaconda3\envs\python314\Lib\site-packages\mcp\client\stdio\__init__.py", line 183, in stdio_client
  |     anyio.create_task_group() as tg,
  |     ~~~~~~~~~~~~~~~~~~~~~~~^^
  |   File "c:\Users\Laura\anaconda3\envs\python314\Li