In [1]:
pip install smolagents ddgs --upgrade transformers nvidia-modelopt==0.19.0

[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
from smolagents import TransformersModel, CodeAgent

### 1. Configuration for 4-bit Quantization

In [3]:
# 1. Configuration for 4-bit quantization
print("--- Loading Model: DeepSeek-R1-Distill-Qwen-7B ---")
quantization_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
)


--- Loading Model: DeepSeek-R1-Distill-Qwen-7B ---


In [4]:
quantization_config

BitsAndBytesConfig {
  "_load_in_4bit": true,
  "_load_in_8bit": false,
  "bnb_4bit_compute_dtype": "float16",
  "bnb_4bit_quant_storage": "uint8",
  "bnb_4bit_quant_type": "nf4",
  "bnb_4bit_use_double_quant": true,
  "llm_int8_enable_fp32_cpu_offload": false,
  "llm_int8_has_fp16_weight": false,
  "llm_int8_skip_modules": null,
  "llm_int8_threshold": 6.0,
  "load_in_4bit": true,
  "load_in_8bit": false,
  "quant_method": "bitsandbytes"
}

### 2. Load the DeepSeek-R1 Distilled Model

In [None]:
model_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    quantization_config=quantization_config,
    device_map="auto", # Automatically puts layers on the GPU
    trust_remote_code=True
)
model

2026-01-11 08:04:34.243620: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1768118674.261501     367 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1768118674.266486     367 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1768118674.279645     367 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768118674.279658     367 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1768118674.279661     367 computation_placer.cc:177] computation placer alr

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
# 3. Wrap the model for smolagents
# We use TransformersModel to keep everything local/offline on Kaggle
smol_model = TransformersModel(
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=2048 # Math problems need space to "think"
)
smol_model

### 4. Create the Agent
CodeAgent is the standard for TIRâ€”it will write and run Python code to solve math.

In [None]:
agent = CodeAgent(
    tools=[], # Python Interpreter is the default internal tool
    model=smol_model,
    add_base_tools=True, # Adds tools like DuckDuckGo search (if internet is on)
    max_steps=5 # Limits the agent to 5 "Think -> Code -> Result" loops per problem
)
print("--- Agent Initialized ---")

agent

### 5. Example Usage

In [None]:
# Example usage
problem = "Find the integration of x*e^x from 0 to 1."
print(f"\nSolving: {problem}")

try:
    response = agent.run(problem)
    print(f"\n[Final Answer]: {response}")
except Exception as e:
    print(f"\n[Error]: {e}")
