In [2]:
%%capture
!pip install unsloth
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes

In [1]:
from unsloth import FastLanguageModel
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset

# 1. Load Model
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen3-4b",
    max_seq_length = 2048,
    load_in_4bit = True,
    load_in_8bit = False,
    full_finetuning = False
)


ðŸ¦¥ Unsloth: Will patch your computer to enable 2x faster free finetuning.
ðŸ¦¥ Unsloth Zoo will now patch everything to make training faster!


  import trl.experimental.openenv.utils as openenv_utils


==((====))==  Unsloth 2025.12.4: Fast Qwen3 patching. Transformers: 4.57.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.9.1+cu128. CUDA: 7.5. CUDA Toolkit: 12.8. Triton: 3.5.1
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.33.post2. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


In [2]:
# 2. Add LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r = 16,
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 32,
    lora_dropout = 0.10,
    bias = "none",
    use_gradient_checkpointing = True,
    use_rslora = False,
    loftq_config = None,
)

Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2025.12.4 patched 36 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


In [3]:
# 3. Data
reasoning_dataset = load_dataset("unsloth/OpenMathReasoning-mini", split = "cot")
non_reasoning_dataset = load_dataset("mlabonne/FineTome-100k", split = "train")

In [4]:
from pprint import pprint
pprint(reasoning_dataset[0])

{'expected_answer': '14',
 'generated_solution': '<think>\n'
                       "Okay, let's see. I need to solve the equation âˆš(xÂ² + "
                       '165) - âˆš(xÂ² - 52) = 7, and find all positive values of '
                       'x. Hmm, radicals can be tricky, but maybe if I can '
                       'eliminate the square roots by squaring both sides. Let '
                       'me try that.\n'
                       '\n'
                       'First, let me write down the equation again to make '
                       'sure I have it right:\n'
                       '\n'
                       'âˆš(xÂ² + 165) - âˆš(xÂ² - 52) = 7.\n'
                       '\n'
                       'Okay, so the idea is to isolate one of the radicals '
                       'and then square both sides. Let me try moving the '
                       'second radical to the other side:\n'
                       '\n'
                       'âˆš(xÂ² + 165) = 7 + âˆš(xÂ² - 52)

In [5]:
pprint(non_reasoning_dataset[0])

{'conversations': [{'from': 'human',
                    'value': 'Explain what boolean operators are, what they '
                             'do, and provide examples of how they can be used '
                             'in programming. Additionally, describe the '
                             'concept of operator precedence and provide '
                             'examples of how it affects the evaluation of '
                             'boolean expressions. Discuss the difference '
                             'between short-circuit evaluation and normal '
                             'evaluation in boolean expressions and '
                             'demonstrate their usage in code. \n'
                             '\n'
                             'Furthermore, add the requirement that the code '
                             'must be written in a language that does not '
                             'support short-circuit evaluation natively, '
                        

In [6]:
def generate_conversation(examples):
    problems  = examples["problem"]
    solutions = examples["generated_solution"]
    conversations = []
    for problem, solution in zip(problems, solutions):
        conversations.append([
            {"role" : "user",      "content" : problem},
            {"role" : "assistant", "content" : solution},
        ])
    return { "conversations": conversations, }

In [7]:
reasoning_conversations = [tokenizer.apply_chat_template(
    conversation,
    tokenize = False,
) for conversation in reasoning_dataset.map(generate_conversation, batched = True)["conversations"]]

In [8]:
print(reasoning_conversations[0])

<|im_start|>user
Given $\sqrt{x^2+165}-\sqrt{x^2-52}=7$ and $x$ is positive, find all possible values of $x$.<|im_end|>
<|im_start|>assistant
<think>
Okay, let's see. I need to solve the equation âˆš(xÂ² + 165) - âˆš(xÂ² - 52) = 7, and find all positive values of x. Hmm, radicals can be tricky, but maybe if I can eliminate the square roots by squaring both sides. Let me try that.

First, let me write down the equation again to make sure I have it right:

âˆš(xÂ² + 165) - âˆš(xÂ² - 52) = 7.

Okay, so the idea is to isolate one of the radicals and then square both sides. Let me try moving the second radical to the other side:

âˆš(xÂ² + 165) = 7 + âˆš(xÂ² - 52).

Now, if I square both sides, maybe I can get rid of the square roots. Let's do that:

(âˆš(xÂ² + 165))Â² = (7 + âˆš(xÂ² - 52))Â².

Simplifying the left side:

xÂ² + 165 = 49 + 14âˆš(xÂ² - 52) + (âˆš(xÂ² - 52))Â².

The right side is expanded using the formula (a + b)Â² = aÂ² + 2ab + bÂ². So the right side becomes 7Â² + 2*7*âˆš(xÂ

In [9]:
from unsloth.chat_templates import standardize_sharegpt
dataset = standardize_sharegpt(non_reasoning_dataset)

non_reasoning_conversations = [tokenizer.apply_chat_template(
    conversation,
    tokenize = False,
) for conversation in dataset["conversations"]]

In [10]:
print(non_reasoning_conversations[0])

<|im_start|>user
Explain what boolean operators are, what they do, and provide examples of how they can be used in programming. Additionally, describe the concept of operator precedence and provide examples of how it affects the evaluation of boolean expressions. Discuss the difference between short-circuit evaluation and normal evaluation in boolean expressions and demonstrate their usage in code. 

Furthermore, add the requirement that the code must be written in a language that does not support short-circuit evaluation natively, forcing the test taker to implement their own logic for short-circuit evaluation.

Finally, delve into the concept of truthiness and falsiness in programming languages, explaining how it affects the evaluation of boolean expressions. Add the constraint that the test taker must write code that handles cases where truthiness and falsiness are implemented differently across different programming languages.<|im_end|>
<|im_start|>assistant
<think>

</think>

Bool

In [11]:
print(len(reasoning_conversations))
print(len(non_reasoning_conversations))

19252
100000


In [12]:
chat_percentage = 0.75

In [13]:
import pandas as pd
non_reasoning_subset = pd.Series(non_reasoning_conversations)
non_reasoning_subset = non_reasoning_subset.sample(
    int(len(reasoning_conversations) * (1.0 - chat_percentage)),
    random_state = 2407,
)

In [14]:
data = pd.concat([
    pd.Series(reasoning_conversations),
    pd.Series(non_reasoning_subset)
])
data.name = "text"

from datasets import Dataset
combined_dataset = Dataset.from_pandas(pd.DataFrame(data))
combined_dataset = combined_dataset.shuffle(seed = 3407)

In [15]:
len(combined_dataset)

24065

In [16]:
from trl import SFTTrainer, SFTConfig
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = combined_dataset,
    eval_dataset = None,
    args = SFTConfig(
        dataset_text_field = "text",
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        max_steps = 30,
        learning_rate = 2e-4,
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        report_to = "none",
    )
)

Unsloth: Tokenizing ["text"] (num_proc=6):   0%|          | 0/24065 [00:00<?, ? examples/s]

ðŸ¦¥ Unsloth: Padding-free auto-enabled, enabling faster training.


In [17]:
trainer_stats = trainer.train()

The model is already on multiple devices. Skipping the move to device specified in `args`.
==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 24,065 | Num Epochs = 1 | Total steps = 30
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 4 x 1) = 8
 "-____-"     Trainable parameters = 33,030,144 of 4,055,498,240 (0.81% trained)


Unsloth: Will smartly offload gradients to save VRAM!


Step,Training Loss
1,0.6153
2,0.6792
3,0.8577
4,0.6976
5,0.5822
6,0.5506
7,0.5508
8,0.5094
9,0.4687
10,0.5765


In [18]:
messages = [
    {"role" : "user", "content" : "Solve (x^2 + 5x + 6 = 0)."}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True,
    enable_thinking = False,
)

from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    max_new_tokens = 2048,
    temperature = 0.7, top_p = 0.8, top_k = 20,
    streamer = TextStreamer(tokenizer, skip_prompt = True),
)

To solve the quadratic equation \( x^2 + 5x + 6 = 0 \), we can factor it. Let's see if the quadratic can be factored into two binomials:

We need two numbers that multiply to 6 (the constant term) and add up to 5 (the coefficient of the x-term). Let's list the factor pairs of 6:

1. 1 and 6 (1 + 6 = 7)
2. 2 and 3 (2 + 3 = 5)

So, the correct factorization is:

\( (x + 2)(x + 3) = 0 \)

Now, we can set each factor equal to zero and solve for x:

1. \( x + 2 = 0 \)
2. \( x + 3 = 0 \)

Solving each equation:

1. \( x + 2 = 0 \)
   Subtract 2 from both sides:
   \( x = -2 \)

2. \( x + 3 = 0 \)
   Subtract 3 from both sides:
   \( x = -3 \)

Therefore, the solutions to the equation \( x^2 + 5x + 6 = 0 \) are \( x = -2 \) and \( x = -3 \).<|im_end|>


In [19]:
messages = [
    {"role" : "user", "content" : "Solve (x^2 + 5x + 6 = 0)."}
]
text = tokenizer.apply_chat_template(
    messages,
    tokenize = False,
    add_generation_prompt = True,
    enable_thinking = True,
)

from transformers import TextStreamer
_ = model.generate(
    **tokenizer(text, return_tensors = "pt").to("cuda"),
    max_new_tokens = 2048,
    temperature = 0.7, top_p = 0.8, top_k = 20,
    streamer = TextStreamer(tokenizer, skip_prompt = True),
)

<think>
Okay, let's see. I need to solve the quadratic equation xÂ² + 5x + 6 = 0. Hmm, quadratic equations usually have two solutions, right? I remember there are a few methods to solve them: factoring, completing the square, or using the quadratic formula. Let me try factoring first because if it factors nicely, that might be the quickest way.

So, the equation is xÂ² + 5x + 6 = 0. To factor this, I need two numbers that multiply to 6 (the constant term) and add up to 5 (the coefficient of the x term). Let me think... 2 and 3. Yes, 2 times 3 is 6, and 2 plus 3 is 5. Perfect!

So, I can rewrite the middle term as 2x + 3x. That gives me xÂ² + 2x + 3x + 6 = 0. Now, let's group the terms: (xÂ² + 2x) + (3x + 6) = 0. Factoring out the common factors from each group, I get x(x + 2) + 3(x + 2) = 0. Now, I can factor out the common binomial factor (x + 2): (x + 2)(x + 3) = 0.

So, the factored form is (x + 2)(x + 3) = 0. According to the zero product property, if the product of two factors is 

In [None]:
model.save_pretrained("qwen3_lora_model")
tokenizer.save_pretrained("qwen3_lora_model")

('qwen3_lora_model/tokenizer_config.json',
 'qwen3_lora_model/special_tokens_map.json',
 'qwen3_lora_model/chat_template.jinja',
 'qwen3_lora_model/vocab.json',
 'qwen3_lora_model/merges.txt',
 'qwen3_lora_model/added_tokens.json',
 'qwen3_lora_model/tokenizer.json')

In [None]:
%%writefile app.py
import streamlit as st
import torch
import os
from unsloth import FastLanguageModel
from transformers import TextStreamer, TextIteratorStreamer
import threading
import base64


# =========================
#   Sidebar Controls
# =========================
st.sidebar.header("Model Settings")
model_name = st.sidebar.text_input("Model Name", value="qwen3_lora_model")
max_new_tokens = st.sidebar.number_input("Max New Tokens", min_value=1, max_value=4096, value=2048)
thinking_mode = st.sidebar.toggle("Enable Thinking Mode", value=True)

# =========================
#   Model Loading
# =========================
@st.cache_resource(show_spinner=True)
def load_model_and_tokenizer(model_name):
    model, tokenizer = FastLanguageModel.from_pretrained(
        model_name=model_name,
        max_seq_length=2048,
        load_in_4bit=True,
    )
    return model, tokenizer

model, tokenizer = load_model_and_tokenizer(model_name)

# =========================
#   Chat State
# =========================
if "messages" not in st.session_state:
    st.session_state.messages = []

# =========================
#   Main Chat Interface
# =========================
st.title("ðŸ§  Qwen3 Micro-Reasoner")
st.caption(f"Loaded from: {model_name}")


# Render chat history
for message in st.session_state.messages:
    with st.chat_message(message["role"]):
        st.markdown(message["content"])

prompt = st.chat_input("Ask a question...")

if prompt:
    # Add user message to history
    st.session_state.messages.append({"role": "user", "content": prompt})
    with st.chat_message("user"):
        st.markdown(prompt)

    # Prepare messages for model
    messages = st.session_state.messages[-10:]  # last 10 for context
    # Only keep role/content
    chat_messages = [{"role": m["role"], "content": m["content"]} for m in messages]

    # Tokenize with/without thinking
    text = tokenizer.apply_chat_template(
        chat_messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=thinking_mode,
    )

    # Stream output
    with st.chat_message("assistant"):
        message_placeholder = st.empty()
        full_response = ""

        streamer = TextIteratorStreamer(tokenizer, skip_prompt=True)
        inputs = tokenizer(text, return_tensors="pt").to(model.device)

        # Run generation in a separate thread
        generation_kwargs = dict(
            **inputs,
            max_new_tokens=max_new_tokens,
            temperature=0.7, top_p=0.8, top_k=20,
            streamer=streamer,
        )
        thread = threading.Thread(target=model.generate, kwargs=generation_kwargs)
        thread.start()

        # Stream output as it is generated
        for new_text in streamer:
            full_response += new_text
            message_placeholder.markdown(full_response + "â–Œ")

        message_placeholder.markdown(full_response)
        st.session_state.messages.append({"role": "assistant", "content": full_response})

In [None]:
from pyngrok import ngrok

ngrok.set_auth_token("PASTE_YOUR_AUTH_TOKEN_HERE")

!streamlit run app.py --server.port 8501 --server.address 0.0.0.0 &>/content/logs.txt &

from pyngrok import ngrok

public_url = ngrok.connect(8501)
print("Streamlit URL:", public_url)


Streamlit URL: NgrokTunnel: "https://eleanora-moony-vanda.ngrok-free.dev" -> "http://localhost:8501"


In [21]:
from huggingface_hub import login
from unsloth import FastLanguageModel

login()

model.push_to_hub_merged(
    "Ashish-kharde1/Qwen3-Micro-Reasoner",
    tokenizer,
    save_method = "lora",
    token = True
)


VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.svâ€¦

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...o-Reasoner/tokenizer.json: 100%|##########| 11.4MB / 11.4MB            

Found HuggingFace hub cache directory: /root/.cache/huggingface/hub


model.safetensors.index.json: 0.00B [00:00, ?B/s]

Checking cache directory for required files...
Cache check failed: model-00001-of-00002.safetensors not found in local cache.
Not all required files found in cache. Will proceed with downloading.
Checking cache directory for required files...
Cache check failed: tokenizer.model not found in local cache.
Not all required files found in cache. Will proceed with downloading.


Unsloth: Preparing safetensor model files:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files:  50%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆ     | 1/2 [04:40<04:40, 280.80s/it]

model-00002-of-00002.safetensors:   0%|          | 0.00/3.08G [00:00<?, ?B/s]

Unsloth: Preparing safetensor model files: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [06:43<00:00, 201.84s/it]


Note: tokenizer.model not found (this is OK for non-SentencePiece models)


Unsloth: Merging weights into 16bit:   0%|          | 0/2 [00:00<?, ?it/s]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...0001-of-00002.safetensors:   1%|          | 33.5MB / 4.97GB            

Unsloth: Merging weights into 16bit:  50%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆ     | 1/2 [03:44<03:44, 224.80s/it]

Processing Files (0 / 0)      : |          |  0.00B /  0.00B            

New Data Upload               : |          |  0.00B /  0.00B            

  ...0002-of-00002.safetensors:   1%|1         | 33.5MB / 3.08GB            

Unsloth: Merging weights into 16bit: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [05:49<00:00, 174.77s/it]


Unsloth: Merge process complete. Saved to `/content/Ashish-kharde1/Qwen3-Micro-Reasoner`
