<a href="https://colab.research.google.com/github/Arpit1118/Post-Training-LLMs-with-RL/blob/main/LLM_Tool_Calling_and_RLHF.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import sympy as sp
import json
import re
from transformers import AutoTokenizer, AutoModelForCausalLM

# --- Qwen Model Setup ---
model_name = "Qwen/Qwen2.5-Coder-3B-Instruct"

# Global variables for the model and tokenizer
# These will be loaded once the script runs
tokenizer = None
model = None

# Function to load the model (called once at startup)
def load_qwen_model():
    """Loads the Qwen model and tokenizer, assigns them to global variables."""
    global tokenizer, model
    try:
        print(f"Loading Qwen model: {model_name}...")
        tokenizer = AutoTokenizer.from_pretrained(model_name)
        # Use torch_dtype=torch.float32 for better CPU compatibility if needed
        model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32)
        model.to('cpu')  # Explicitly move model to CPU
        model.eval()     # Set model to evaluation mode
        print("Model loaded successfully.")
    except Exception as e:
        print(f"ERROR: Failed to load Qwen model/tokenizer. Please ensure you have transformers and PyTorch installed. Error: {e}")
        # In a real script, you might want to exit here if the model is crucial

In [None]:
class MathSolver:
    def __init__(self, variable='x'):
        self.x = sp.Symbol(variable)

    def solve_equation(self, equation_str):
        """Solves an equation for 'x' and returns symbolic/numeric results."""
        try:
            if '=' in equation_str:
                lhs, rhs = equation_str.split('=')
                expr = sp.sympify(lhs) - sp.sympify(rhs)
            else:
                expr = sp.sympify(equation_str)

            roots = sp.solve(expr, self.x)
            numeric = [sp.N(r) for r in roots]

            return {
                "success": True,
                "symbolic": [str(r) for r in roots],
                "numeric": [str(n) for n in numeric],
                "error": None
            }
        except Exception as e:
            return {
                "success": False,
                "symbolic": None,
                "numeric": None,
                "error": str(e)
            }

    def evaluate_expression(self, expr_str):
        """Evaluates a basic math expression."""
        try:
            # Use evalf() for numeric evaluation
            result = sp.sympify(expr_str).evalf()
            return {
                "success": True,
                "result": str(result),
                "error": None
            }
        except Exception as e:
            return {
                "success": False,
                "result": None,
                "error": str(e)
            }

In [None]:
# Instantiate the solver
math_solver_instance = MathSolver()

# Map the function names to their executable counterparts
AVAILABLE_TOOLS = {
    "solve_equation": math_solver_instance.solve_equation,
    "evaluate_expression": math_solver_instance.evaluate_expression,
}

# Define the tool specifications in Qwen's expected format (used in the SYSTEM_PROMPT)
MATH_TOOL_DEFINITION = """
[
    {
        "name": "solve_equation",
        "description": "Solves an algebraic equation for the variable 'x'. Use this for problems containing an equals sign, e.g., 'x**2 - 4 = 0'.",
        "parameters": {
            "type": "object",
            "properties": {
                "equation_str": {
                    "type": "string",
                    "description": "The equation to solve, e.g., 'x**2 - 4 = 0'."
                }
            },
            "required": ["equation_str"]
        }
    },
    {
        "name": "evaluate_expression",
         "description": "Calculates the numeric result of a math expression. Use this for calculations without an equals sign, e.g., '5*6' or 'sqrt(9)'.",
        "parameters": {
            "type": "object",
            "properties": {
                "expr_str": {
                    "type": "string",
                    "description": "The expression to evaluate, e.g., '2 + 3 * 4' or 'sqrt(9)'."
                }
            },
            "required": ["expr_str"]
        }
    }
]
"""

SYSTEM_PROMPT = f"""
You are a helpful and precise assistant. You have access to the following math-solving tools:
{MATH_TOOL_DEFINITION}
When the user asks a mathematical question (equation solving or calculation), you **must** call the appropriate tool.
You **must** respond with the tool call exactly in the following format:
<|action_start|>
{{
  "name": "tool_name",
  "arguments": {{
    "arg1": "value1",
    "arg2": "value2"
  }}
}}
<|action_end|>
Do not output any introductory or conversational text before the tool call. Only after receiving the tool's result should you provide a natural language answer.
If the user's request is not a math problem, answer directly without a tool call.
"""

In [None]:
def execute_tool_call(tool_name, tool_args):
    """Executes the specified tool with arguments."""
    tool_func = AVAILABLE_TOOLS.get(tool_name)
    if tool_func:
        # NOTE: Tool arguments from the model often come as strings, so they are passed directly
        # The MathSolver is designed to handle string inputs.
        try:
            return tool_func(**tool_args)
        except Exception as e:
            return {"success": False, "error": str(e)}
    else:
        return {"success": False, "error": f"Tool '{tool_name}' not found."}

def generate_response(prompt):
    """Generates the Qwen model's response, handling tool calls iteratively."""

    # Ensure model and tokenizer are loaded
    if not model or not tokenizer:
        return "ERROR: Model not loaded. Please check the setup."

    # Initial messages setup
    history = []
    messages = [{"role": "system", "content": SYSTEM_PROMPT}] + history + [{"role": "user", "content": prompt}]

    # ------------------
    # LOOP 1: Initial Generation (Model decides if a tool is needed)
    # ------------------
    input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
    output = model.generate(
        input_ids, max_new_tokens=512, do_sample=False, pad_token_id=tokenizer.eos_token_id
    )
    response_text = tokenizer.decode(output[0][input_ids.shape[1]:], skip_special_tokens=True).strip()

    # Check for a tool call pattern
    tool_call_match = re.search(r"<\|action_start\|>(.*?)<\|action_end\|>", response_text, re.DOTALL)

    if tool_call_match:
        print("\n[--- Tool Call Detected ---]")
        try:
            tool_call_json = json.loads(tool_call_match.group(1))
            tool_name = tool_call_json.get("name")
            tool_args = tool_call_json.get("arguments", {})

            print(f"   Tool: {tool_name}, Args: {tool_args}")

            # Execute the tool
            tool_output = execute_tool_call(tool_name, tool_args)
            print(f"   Tool Result: {tool_output}")

            # ------------------
            # LOOP 2: Rerun the model with the tool output (ReAct Step)
            # ------------------

            # 1. Add the model's tool-call message (the action) to history
            messages.append({"role": "assistant", "content": response_text})

            # 2. Add the tool's result message (the observation) to history
            tool_response_message = {
            "role": "assistant",
            "content": f"The result of calling {tool_name} with arguments {tool_args} is: {tool_output}"
}
            messages.append(tool_response_message)

            print("[--- Rerunning model to generate final answer ---]")

            final_input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
            final_output = model.generate(
                final_input_ids, max_new_tokens=512, do_sample=False, pad_token_id=tokenizer.eos_token_id
            )
            final_response_text = tokenizer.decode(final_output[0][final_input_ids.shape[1]:], skip_special_tokens=True).strip()

            # Clean up the final response to remove internal markers
            final_response_text = re.sub( r"<\|action_start\|>.*?<\|action_end\|>", "", final_response_text, flags=re.DOTALL).strip()
            return final_response_text

        except (json.JSONDecodeError, KeyError) as e:
            print(f"[Warning: Failed to parse tool call. Error: {e}]")
            # If parsing fails, fall through to returning the original response

    # Return the direct response if no valid tool call was detected or if tool calling failed
    return response_text

In [None]:
if __name__ == "__main__":

    # Load the model and tokenizer (This fixes the 'model is not defined' error)
    load_qwen_model()

    print("\nQwen Assistant with Math Solver Tool Ready. Type 'exit' to quit.")

    while True:
        try:
            user_input = input("\nUser >>> ")
            if user_input.lower() in ['exit', 'quit']:
                break

            # Call the generation function
            response = generate_response(user_input)

            # Display the final output
            print(f"Qwen <<< {response}")

        except KeyboardInterrupt:
            print("\nExiting...")
            break
        except Exception as e:
            print(f"\nAn unexpected error occurred: {e}")
            break

Loading Qwen model: Qwen/Qwen2.5-Coder-3B-Instruct...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/661 [00:00<?, ?B/s]

`torch_dtype` is deprecated! Use `dtype` instead!


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Fetching 2 files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/1.21G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.96G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [None]:
prompt = "5 * 6 * 7"
response = generate_response(prompt)
print("Response:", response)



[--- Tool Call Detected ---]
   Tool: solve_equation, Args: {'equation_str': 'x**2 - 4 = 0'}
   Tool Result: {'success': True, 'symbolic': ['-2', '2'], 'numeric': ['-2.00000000000000', '2.00000000000000'], 'error': None}
[--- Rerunning model to generate final answer ---]
Response: Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Comey
 Com