# Installation & Model Loading

In [None]:
%%capture

!pip install unsloth # install unsloth
!pip install --force-reinstall --no-cache-dir --no-deps git+https://github.com/unslothai/unsloth.git

In [None]:
%%capture

import wandb
from google.colab import userdata
from huggingface_hub import login
login(token=userdata.get('HF_TOKEN')) # add Hugging Face token to "secrets"
wandb.login(key=userdata.get('WANDB_TOKEN')) # add Weights and Balances token to "secrets"
run = wandb.init(
    project='Fine-tune-DeepSeek-R1-Distill-Llama-8B on Model 0',
    job_type="training",
    anonymous="allow"
)

In [None]:
from unsloth import FastLanguageModel

max_seq_length = 1024 * 10 # Define max_seq_length
dtype = None
load_in_4bit = True

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    token= userdata.get('HF_TOKEN'),
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
🦥 Unsloth Zoo will now patch everything to make training faster!
==((====))==  Unsloth 2025.3.18: Fast Llama patching. Transformers: 4.49.0.
   \\   /|    NVIDIA A100-SXM4-40GB. Num GPUs = 1. Max memory: 39.557 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 8.0. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/5.96G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/236 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/53.0k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/483 [00:00<?, ?B/s]

In [None]:
model_lora = FastLanguageModel.get_peft_model(
    model,
    r=16,
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ],
    lora_alpha=16,
    lora_dropout=0,
    bias="none",
    use_gradient_checkpointing="unsloth",
    random_state=3407,
    use_rslora=False,
    loftq_config=None,
)

Unsloth 2025.3.18 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


# Formatting Dataset

In [None]:
train_prompt_style = """You are given an XML file that represents an asciinema terminal session. The XML file contains a root <recording> tag and several child tags, each representing an event. These child tags can be either <user_input> (representing user commands) or <system_output> (representing system responses). Each tag includes a timestamp id (in seconds) that indicates when the event occurred.

Your task is to output a numbered file from processing the XML file and group the sequential events so that each group represents a single high-level goal. The groups should contain one or more consecutive events (both <user_input> and <system_output>) that together form a logical unit of work. At the end of each grouping, you must output a "0" on a new line as a separator, and the entire file must end with a "0".

**Important Notes:**
1. **End-of-Group Separator:** After each group of events corresponding to a high-level goal, output a single "0" on a new line. Also, ensure that the overall output ends with a "0".
2. **Keystroke-level Input:** The parsed XML file is generated by recording every keystroke. This means that a complete command may be split across multiple consecutive <user_input> tags. Your grouping should take this into account and combine these keystroke events into a single logical command or goal.

Input:
An XML file formatted as follows (the line number corresponds to the order of events):

Example snippet (note that user input events may represent only a single keystroke):

  <system_output timestamp="0.096022">[?2004h]0;demo@boxtop: ~demo@boxtop:~$ </system_output>
  <user_input timestamp="9.163614">s</user_input>
  <system_output timestamp="9.164051">s</system_output>
  <user_input timestamp="9.365744">s</user_input>
  <system_output timestamp="9.366263">s</system_output>
  <user_input timestamp="9.589844">h</user_input>
  <system_output timestamp="9.59026">h</system_output>
  <user_input timestamp="9.708352"> </user_input>
  <system_output timestamp="9.708844"> </system_output>
  <user_input timestamp="10.1118">1</user_input>
  <system_output timestamp="10.112236">1</system_output>
  <user_input timestamp="10.270878">0</user_input>
  <system_output timestamp="10.271223">0</system_output>
  <user_input timestamp="10.471565">.</user_input>
  <system_output timestamp="10.471898">.</system_output>
  <user_input timestamp="10.594981">0</user_input>
  <system_output timestamp="10.595383">0</system_output>
  <user_input timestamp="10.757499">.</user_input>
  <system_output timestamp="10.757882">.</system_output>
  <user_input timestamp="11.140897">7</user_input>
  <system_output timestamp="11.14119">7</system_output>
  <user_input timestamp="11.603706">.</user_input>
  <system_output timestamp="11.604019">.</system_output>
  <user_input timestamp="12.330584">1</user_input>
  <system_output timestamp="12.331455">1</system_output>
  <user_input timestamp="12.632256">3</user_input>
  <system_output timestamp="12.633323">3</system_output>
  <user_input timestamp="13.446626">8</user_input>
  <system_output timestamp="13.447562">8</system_output>
  <user_input timestamp="14.510021"> (End of input marker or newline)</user_input>

Assume that each tag is assigned a line number based on its position in the XML file (i.e. line 1 for the first event, line 2 for the second, and so on).

Expected Output Format:
For each group of events that collectively accomplish a high-level goal, output the corresponding line numbers in sequential order (each on a new line). At the end of each group, output a single "0" on a new line to mark the end of that grouping. The entire output must also end with a "0".

For example, if the first high-level goal is accomplished by events on lines 1 through 3, and the next goal is accomplished by events on lines 4 through 8, you will output:
1
2
3
0
4
5
6
7
8
0

Task Summary:
1. Parse the XML file.
2. Group the events (<user_input> and <system_output>) into sequential blocks, where each block represents a complete high-level goal.
    - Remember that <user_input> events may be recorded for each keystroke. Therefore, consecutive keystroke events that form a complete command should be grouped together.
3. Output the line numbers of each event in each group, printing "0" on a new line after each group.
4. Ensure that the overall output ends with a "0".

Example:
Given an XML file with the following events and assuming a logical grouping is determined as follows:

- **Group 1:** Events 1–3 (e.g., initial terminal prompt or command display).
- **Group 2:** Events 4–8 (e.g., a command input that spans multiple keystroke events and its associated outputs).

Then your output should be:
1
2
3
0
4
5
6
7
8
0

Additional Notes:
- The grouping must be sequential; no events should be skipped.
- All events in a group must be consecutive in the XML file.
- Use the provided dataset with correct input and corresponding grouping output examples to learn the grouping patterns.
- The model should intelligently group keystroke-level <user_input> events that together form a complete command.

### XML Input:
{}

### Output:
{}"""


In [None]:
from google.colab import drive
import os

drive.mount('/content/drive')

txt_folder = "/content/drive/MyDrive/model_0_data"  # set to your google drive folder

input_data = []
response_data = []

for filename in os.listdir(txt_folder):
    if filename.endswith("xml"):
        with open(os.path.join(txt_folder, filename), "r", encoding="utf-8") as f:
            content = f.read()
            input_data.append({"filename": filename, "content": content})
    else:
        with open(os.path.join(txt_folder, filename), "r", encoding="utf-8") as f:
            content = f.read()
            response_data.append({"filename": filename, "content": content})

input_data = sorted(input_data, key=lambda x: x["filename"])
response_data = sorted(response_data, key=lambda x: x["filename"])

input_data = [item["content"] for item in input_data]
response_data = [item["content"] for item in response_data]

Mounted at /content/drive


In [None]:
print(response_data[0]) # check data

In [None]:
from datasets import load_dataset, Dataset

dataset = Dataset.from_dict({"Input": input_data, "Response": response_data})
dataset

Dataset({
    features: ['Input', 'Response'],
    num_rows: 6
})

In [None]:
EOS_TOKEN = tokenizer.eos_token

def formatting_prompts_func(examples):
    inputs = examples["Input"]
    outputs = examples["Response"]
    texts = []
    for input, output in zip(inputs, outputs):
        text = train_prompt_style.format(input, output) + EOS_TOKEN
        texts.append(text)
    return {
        "text": texts,
    }

In [None]:
dataset_finetune = dataset.map(formatting_prompts_func, batched = True)

Map:   0%|          | 0/6 [00:00<?, ? examples/s]

# Setting Training Arguments & Training

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model=model_lora,
    tokenizer=tokenizer,
    train_dataset=dataset_finetune,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    dataset_num_proc=2,

    args=TrainingArguments(
        per_device_train_batch_size=2,
        gradient_accumulation_steps=1,
        num_train_epochs=5,
        warmup_steps=5,
        max_steps=60,
        learning_rate=2e-4,
        fp16=not is_bfloat16_supported(),
        bf16=is_bfloat16_supported(),
        logging_steps=10,
        optim="adamw_8bit",
        weight_decay=0.01,
        lr_scheduler_type="linear",
        seed=3407,
        output_dir="outputs",
        report_to = "none"
    ),
)

Unsloth: Tokenizing ["text"] (num_proc=2):   0%|          | 0/6 [00:00<?, ? examples/s]

In [None]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 6 | Num Epochs = 20 | Total steps = 60
O^O/ \_/ \    Batch size per device = 2 | Gradient accumulation steps = 1
\        /    Data Parallel GPUs = 1 | Total batch size (2 x 1 x 1) = 2
 "-____-"     Trainable parameters = 41,943,040/8,000,000,000 (0.52% trained)


Step,Training Loss
10,0.9812
20,0.7166
30,0.5099
40,0.3753
50,0.2622
60,0.2103


Unsloth: Will smartly offload gradients to save VRAM!


In [None]:
wandb.finish()

# Testing

In [None]:
question = """<?xml version="1.0" ?>
<recording version="2" width="319" height="92" timestamp="1727009557">
  <system_output timestamp="0.071459">[?2004h]0;demo@boxtop: ~demo@boxtop:~$ </system_output>
  <user_input timestamp="3.724374">s</user_input>
  <system_output timestamp="3.725312">s</system_output>
  <user_input timestamp="3.944484">s</user_input>
  <system_output timestamp="3.945402">s</system_output>
  <user_input timestamp="4.166899">h</user_input>
  <system_output timestamp="4.167862">h</system_output>
  <user_input timestamp="4.3286"> </user_input>
  <system_output timestamp="4.329581"> </system_output>
  <user_input timestamp="4.648216">1</user_input>
  <system_output timestamp="4.64918">1</system_output>
  <user_input timestamp="4.828329">0</user_input>
  <system_output timestamp="4.829196">0</system_output>
  <user_input timestamp="5.090241">.</user_input>
  <system_output timestamp="5.091185">.</system_output>
  <user_input timestamp="5.250818">0</user_input>
  <system_output timestamp="5.251636">0</system_output>
  <user_input timestamp="5.431616">.</user_input>
  <system_output timestamp="5.432483">.</system_output>
  <user_input timestamp="5.85454">7</user_input>
  <system_output timestamp="5.855438">7</system_output>
  <user_input timestamp="6.074859">.</user_input>
  <system_output timestamp="6.075734">.</system_output>
  <user_input timestamp="6.435266">1</user_input>
  <system_output timestamp="6.43612">1</system_output>
  <user_input timestamp="6.778539">3</user_input>
  <system_output timestamp="6.779415">3</system_output>
  <user_input timestamp="7.178519">7</user_input>
  <system_output timestamp="7.179402">7</system_output>
  <user_input timestamp="7.663922"></user_input>
  <system_output timestamp="7.664737">[K</system_output>
  <user_input timestamp="7.887423">8</user_input>
  <system_output timestamp="7.888286">8</system_output>
  <user_input timestamp="9.000601">
</user_input>
  <system_output timestamp="9.001515">
</system_output>
  <system_output timestamp="9.001629">[?2004l
</system_output>
  <system_output timestamp="9.586503">
demo@10.0.7.138's password: </system_output>
  <user_input timestamp="9.958319">1</user_input>
  <user_input timestamp="10.038434">M</user_input>
  <user_input timestamp="10.198845">3</user_input>
  <user_input timestamp="10.298877">T</user_input>
  <user_input timestamp="10.480309">5</user_input>
  <user_input timestamp="10.579129">6</user_input>
  <user_input timestamp="10.920891">7</user_input>
  <user_input timestamp="11.103528">!</user_input>
  <user_input timestamp="11.241265">
</user_input>
  <system_output timestamp="11.242199">
</system_output>
  <system_output timestamp="11.518481">Linux boxtop 6.6.13-amd64 #1 SMP PREEMPT_DYNAMIC Debian 6.6.13-1 (2024-01-20) x86_64


Plan your installation, and FAI installs your plan.

Last login: Sun Sep 22 12:50:30 2024 from 10.0.7.1

</system_output>
  <system_output timestamp="11.587303">[?2004h]0;demo@boxtop: ~demo@boxtop:~$ </system_output>
  <user_input timestamp="11.985103">p</user_input>
  <system_output timestamp="11.988199">p</system_output>
  <user_input timestamp="12.146566">a</user_input>
  <system_output timestamp="12.158429">a</system_output>
  <user_input timestamp="12.308408">s</user_input>
  <system_output timestamp="12.323088">s</system_output>
  <user_input timestamp="12.490203">s</user_input>
  <system_output timestamp="12.513615">s</system_output>
  <user_input timestamp="12.748143">w</user_input>
  <system_output timestamp="12.770986">w</system_output>
  <user_input timestamp="13.071223">d</user_input>
  <system_output timestamp="13.079019">d</system_output>
  <user_input timestamp="13.352127">
</user_input>
  <system_output timestamp="13.375242">
[?2004l
</system_output>
  <system_output timestamp="14.956765">Changing password for demo.
Current password: </system_output>
  <user_input timestamp="15.932853">1</user_input>
  <user_input timestamp="16.033733">M</user_input>
  <user_input timestamp="16.195366">3</user_input>
  <user_input timestamp="16.317828">T</user_input>
  <user_input timestamp="16.517665">5</user_input>
  <user_input timestamp="16.57855">6</user_input>
  <user_input timestamp="16.937213">7</user_input>
  <user_input timestamp="17.118217">!</user_input>
  <user_input timestamp="17.297791">
</user_input>
  <system_output timestamp="17.32098">
</system_output>
  <system_output timestamp="17.416283">New password: </system_output>
  <user_input timestamp="19.239669">O</user_input>
  <user_input timestamp="19.462807">p</user_input>
  <user_input timestamp="19.663239">e</user_input>
  <user_input timestamp="19.823472">n</user_input>
  <user_input timestamp="20.165214">Y</user_input>
  <user_input timestamp="20.328604">o</user_input>
  <user_input timestamp="20.467273">u</user_input>
  <user_input timestamp="20.684951">r</user_input>
  <user_input timestamp="21.066578">H</user_input>
  <user_input timestamp="21.224623">e</user_input>
  <user_input timestamp="21.324622">a</user_input>
  <user_input timestamp="21.46358">r</user_input>
  <user_input timestamp="21.725808">t</user_input>
  <user_input timestamp="22.108267">G</user_input>
  <user_input timestamp="22.204951">P</user_input>
  <user_input timestamp="22.423659">T</user_input>
  <user_input timestamp="22.764709">
</user_input>
  <system_output timestamp="22.788938">
Retype new password: </system_output>
  <user_input timestamp="23.582787">O</user_input>
  <user_input timestamp="23.804832">p</user_input>
  <user_input timestamp="24.002847">e</user_input>
  <user_input timestamp="24.184193">n</user_input>
  <user_input timestamp="24.486491">Y</user_input>
  <user_input timestamp="24.665441">o</user_input>
  <user_input timestamp="24.804858">u</user_input>
  <user_input timestamp="25.024414">r</user_input>
  <user_input timestamp="25.449668">H</user_input>
  <user_input timestamp="25.652843">e</user_input>
  <user_input timestamp="25.77554">a</user_input>
  <user_input timestamp="25.996544">r</user_input>
  <user_input timestamp="26.261924">t</user_input>
  <user_input timestamp="26.706184">G</user_input>
  <user_input timestamp="26.765363">P</user_input>
  <user_input timestamp="27.001545">T</user_input>
  <user_input timestamp="27.340847">
</user_input>
  <system_output timestamp="27.350003">
</system_output>
  <system_output timestamp="29.339981">passwd: password updated successfully
[?2004h]0;demo@boxtop: ~demo@boxtop:~$ </system_output>
  <user_input timestamp="31.580684"/>
  <system_output timestamp="31.585122">[?2004l

logout
</system_output>
  <system_output timestamp="31.590318">Connection to 10.0.7.138 closed.

</system_output>
  <system_output timestamp="31.591579">[?2004h</system_output>
  <system_output timestamp="31.591914">]0;demo@boxtop: ~demo@boxtop:~$ </system_output>
  <user_input timestamp="33.647017"/>
  <system_output timestamp="33.647764">[?2004l

exit
</system_output>
</recording>
"""
print(question)

In [None]:
FastLanguageModel.for_inference(model_lora)

inputs = tokenizer([train_prompt_style.format(question, "")], return_tensors="pt").to("cuda")

outputs = model_lora.generate(
    input_ids=inputs.input_ids,
    attention_mask=inputs.attention_mask,
    max_new_tokens=1200,
    use_cache=True,
)

response = tokenizer.batch_decode(outputs)

print(response[0].split("### Output:")[1])


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
0
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
0
139
140
141
142
143
144
145
146
147
148
0
149
150
151
152
153
154
0
<｜end▁of▁sentence｜>


# Saving to Hugging Face

In [None]:
model_lora.save_pretrained_merged("model_lora", tokenizer, save_method = "lora",)
model_lora.push_to_hub_merged("bria7801/Model-0", tokenizer, save_method = "lora", token = userdata.get('HF_TOKEN'))

Unsloth: Saving tokenizer... Done.
Unsloth: Saving model... Done.
Unsloth: Saving LoRA adapters. Please wait...


README.md:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/168M [00:00<?, ?B/s]

README.md:   0%|          | 0.00/49.0 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.2M [00:00<?, ?B/s]

Saved lora model to https://huggingface.co/bria7801/Model-0
