In [None]:
!pip install transformers



In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct"
device = "cuda" # the device to load the model onto

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype="auto",
    device_map="auto"
)
tokenizer = AutoTokenizer.from_pretrained(model_name)

prompt = "Find the value of $x$ that satisfies the equation $4x+5 = 6x+7$."

# CoT
messages = [
    {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
    {"role": "user", "content": prompt}
]

# TIR
messages = [
    {"role": "system", "content": "Please integrate natural language reasoning with programs to solve the problem above, and put your final answer within \\boxed{}."},
    {"role": "user", "content": prompt}
]

text = tokenizer.apply_chat_template(
    messages,
    tokenize=False,
    add_generation_prompt=True
)
model_inputs = tokenizer([text], return_tensors="pt").to(device)

generated_ids = model.generate(
    **model_inputs,
    max_new_tokens=512
)
generated_ids = [
    output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
]

response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/656 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.32k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

In [None]:
print(response)

To solve the equation \(4x + 5 = 6x + 7\), we need to isolate \(x\). Let's go through the steps systematically:

1. Subtract \(4x\) from both sides of the equation to get the \(x\) terms on one side:
\[5 = 2x + 7\]

2. Subtract 7 from both sides to isolate the term with \(x\):
\[-2 = 2x\]

3. Divide both sides by 2 to solve for \(x\):
\[x = -1\]

Let's verify this solution by substituting \(x = -1\) back into the original equation to ensure it holds true. We'll use Python to confirm our result.
```python
# Define the value of x
x = -1

# Left side of the equation
left_side = 4 * x + 5

# Right side of the equation
right_side = 6 * x + 7

# Check if both sides are equal
print((left_side, right_side, left_side == right_side))
```
```output
(1, 1, True)
```
The left side of the equation evaluates to \(1\) and the right side also evaluates to \(1\), confirming that the equation holds true when \(x = -1\).

Thus, the value of \(x\) that satisfies the equation \(4x + 5 = 6x + 7\) is \(\boxed

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer
!pip install transformers

model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct"
device = "cuda" # the device to load the model onto

try:
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype="auto",
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)
except Exception as e:
    print(f"Error loading model or tokenizer: {e}")
    exit()


def chatbot(prompt):
    messages = [
        {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
        {"role": "user", "content": prompt}
    ]

    try:
      text = tokenizer.apply_chat_template(
          messages,
          tokenize=False,
          add_generation_prompt=True
      )
      model_inputs = tokenizer([text], return_tensors="pt").to(device)

      generated_ids = model.generate(
          **model_inputs,
          max_new_tokens=512
      )
      generated_ids = [
          output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
      ]

      response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
      return response
    except Exception as e:
        return f"An error occurred: {e}"

while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit", "bye"]:
        break
    response = chatbot(user_input)
    print("Chatbot:", response)



The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/656 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.32k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

You: integral of 2x^2+5
Chatbot: To find the integral of the function \(2x^2 + 5\), we will integrate each term separately. The integral of a sum is the sum of the integrals.

The integral of \(2x^2\) can be found using the power rule for integration, which states that the integral of \(x^n\) is \(\frac{x^{n+1}}{n+1}\) plus a constant. Here, \(n = 2\), so we have:

\[
\int 2x^2 \, dx = 2 \int x^2 \, dx = 2 \left( \frac{x^{2+1}}{2+1} \right) = 2 \left( \frac{x^3}{3} \right) = \frac{2x^3}{3}
\]

Next, we integrate the constant term \(5\). The integral of a constant \(a\) is \(ax\) plus a constant. Here, \(a = 5\), so we have:

\[
\int 5 \, dx = 5x
\]

Combining these results, we get:

\[
\int (2x^2 + 5) \, dx = \frac{2x^3}{3} + 5x + C
\]

where \(C\) is the constant of integration. Therefore, the final answer is:

\[
\boxed{\frac{2x^3}{3} + 5x + C}
\]
You: integral of 2x+3 with limits 1 to -1
Chatbot: To find the integral of the function \(2x + 3\) from \(1\) to \(-1\), we will follow th

KeyboardInterrupt: Interrupted by user

In [None]:
!pip install transformers gradio



In [None]:
# prompt: connect this chatbot with gradio and give me code

from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr


model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct"
device = "cuda" # the device to load the model onto

try:
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype="auto",
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)
except Exception as e:
    print(f"Error loading model or tokenizer: {e}")
    exit()


def chatbot(prompt):
    messages = [
        {"role": "system", "content": "Please reason step by step, and put your final answer within \\boxed{}."},
        {"role": "user", "content": prompt}
    ]

    try:
      text = tokenizer.apply_chat_template(
          messages,
          tokenize=False,
          add_generation_prompt=True
      )
      model_inputs = tokenizer([text], return_tensors="pt").to(device)

      generated_ids = model.generate(
          **model_inputs,
          max_new_tokens=512
      )
      generated_ids = [
          output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
      ]

      response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
      return response
    except Exception as e:
        return f"An error occurred: {e}"

iface = gr.Interface(
    fn=chatbot,
    inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
    outputs="text",
    title="Qwen2.5-Math-1.5B-Instruct Chatbot",
    description="Ask me a math question!"
)

iface.launch()

model.safetensors:  55%|#####5    | 1.71G/3.09G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/160 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/7.32k [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://0505b38b3d07430aaa.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
# prompt: remove the extra int and boxed from the code

from transformers import AutoModelForCausalLM, AutoTokenizer
import gradio as gr

!pip install transformers gradio

model_name = "Qwen/Qwen2.5-Math-1.5B-Instruct"
device = "cuda"  # the device to load the model onto

try:
    model = AutoModelForCausalLM.from_pretrained(
        model_name,
        torch_dtype="auto",
        device_map="auto"
    )
    tokenizer = AutoTokenizer.from_pretrained(model_name)
except Exception as e:
    print(f"Error loading model or tokenizer: {e}")
    exit()


def chatbot(prompt):
    messages = [
        {"role": "system", "content": "Please reason step by step, and provide your final answer."},
        {"role": "user", "content": prompt}
    ]

    try:
        text = tokenizer.apply_chat_template(
            messages,
            tokenize=False,
            add_generation_prompt=True
        )
        model_inputs = tokenizer([text], return_tensors="pt").to(device)

        generated_ids = model.generate(
            **model_inputs,
            max_new_tokens=512
        )
        generated_ids = [
            output_ids[len(input_ids):] for input_ids, output_ids in zip(model_inputs.input_ids, generated_ids)
        ]

        response = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)[0]
        return response
    except Exception as e:
        return f"An error occurred: {e}"

iface = gr.Interface(
    fn=chatbot,
    inputs=gr.Textbox(lines=2, placeholder="Enter your prompt here..."),
    outputs="text",
    title="Qwen2.5-Math-1.5B-Instruct Chatbot",
    description="Ask me a math question!"
)

iface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://cad037b5e75f550ccf.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


