# **Networking - SFT**

In [None]:
!uv pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
!uv pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
!uv pip install transformers==4.51.3
!uv pip install --no-deps unsloth

[2mUsing Python 3.11.12 environment at: /usr[0m
[37m⠋[0m [2mResolving dependencies...                                                     [0m[2K[37m⠋[0m [2mResolving dependencies...                                                     [0m[2K[37m⠋[0m [2mxformers==0.0.29.post3                                                        [0m[2K[37m⠋[0m [2mtrl==0.15.2                                                                   [0m[2K[37m⠋[0m [2mbitsandbytes==0.46.0                                                          [0m[2K[37m⠋[0m [2maccelerate==1.7.0                                                             [0m[2K[37m⠋[0m [2mpeft==0.15.2                                                                  [0m[2K[37m⠋[0m [2mtriton==3.2.0                                                                 [0m[2K[37m⠋[0m [2mcut-cross-entropy==25.1.1                                                     [0m[2K[37m⠋[0m [2munsloth-zoo==2025

In [None]:
from unsloth import FastLanguageModel
import torch


max_seq_length = 4096
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True


model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Qwen2.5-0.5B",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)


model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

==((====))==  Unsloth 2025.5.9: Fast Qwen2 patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


ModuleNotFoundError: No module named 'transformers.models.bitnet'

In [None]:
from datasets import Dataset


def load_my_text_dataset(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        lines = f.readlines()
    return {"text": [line.strip() for line in lines if line.strip()]}


cisco_data = load_my_text_dataset("cisco.txt")
juniper_data = load_my_text_dataset("juniper.txt")

cisco_dataset = Dataset.from_dict(cisco_data)
juniper_dataset = Dataset.from_dict(juniper_data)

combined_dataset = Dataset.from_dict({
    "text": cisco_data["text"] + juniper_data["text"]
})


print(combined_dataset)
print("Total samples:", len(combined_dataset))


Dataset({
    features: ['text'],
    num_rows: 924
})
Total samples: 924


In [None]:
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from trl import SFTTrainer

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = combined_dataset,
    dataset_text_field = "text",  # Key that holds your plain text
    max_seq_length = 1024,
    dataset_num_proc = 2,
    packing = False,              # Set True for many short sequences
    args = TrainingArguments(
        per_device_train_batch_size = 8,
        gradient_accumulation_steps = 4,
        warmup_steps = 25,
        num_train_epochs = 20,              # ✅ Use this instead of max_steps
        # max_steps = 100,                  # You can increase this for better results
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 100,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none",  # Use "wandb" if you want logging
    ),
)

trainer.train()

Unsloth: Tokenizing ["text"]:   0%|          | 0/924 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs used = 1
   \\   /|    Num examples = 924 | Num Epochs = 20 | Total steps = 580
O^O/ \_/ \    Batch size per device = 8 | Gradient accumulation steps = 4
\        /    Data Parallel GPUs = 1 | Total batch size (8 x 4 x 1) = 32
 "-____-"     Trainable parameters = 8,798,208/5,000,000,000 (0.18% trained)


Step,Training Loss
100,2.1489
200,0.6952
300,0.3022
400,0.2198
500,0.1811


TrainOutput(global_step=580, training_loss=0.6336820240678458, metrics={'train_runtime': 846.1358, 'train_samples_per_second': 21.84, 'train_steps_per_second': 0.685, 'total_flos': 1464702444902400.0, 'train_loss': 0.6336820240678458})

In [None]:
!zip -r outputs.zip ./outputs/

  adding: outputs/ (stored 0%)
  adding: outputs/checkpoint-580/ (stored 0%)
  adding: outputs/checkpoint-580/special_tokens_map.json (deflated 68%)
  adding: outputs/checkpoint-580/optimizer.pt (deflated 10%)
  adding: outputs/checkpoint-580/adapter_model.safetensors (deflated 7%)
  adding: outputs/checkpoint-580/rng_state.pth (deflated 25%)
  adding: outputs/checkpoint-580/tokenizer_config.json (deflated 89%)
  adding: outputs/checkpoint-580/scaler.pt (deflated 60%)
  adding: outputs/checkpoint-580/merges.txt (deflated 57%)
  adding: outputs/checkpoint-580/scheduler.pt (deflated 55%)
  adding: outputs/checkpoint-580/training_args.bin (deflated 51%)
  adding: outputs/checkpoint-580/vocab.json (deflated 61%)
  adding: outputs/checkpoint-580/added_tokens.json (deflated 67%)
  adding: outputs/checkpoint-580/trainer_state.json (deflated 64%)
  adding: outputs/checkpoint-580/adapter_config.json (deflated 56%)
  adding: outputs/checkpoint-580/tokenizer.json (deflated 81%)
  adding: outputs/

In [None]:
!unzip outputNetworkSFT.zip -d outputs

Archive:  outputNetworkSFT.zip
   creating: outputs/outputs/
   creating: outputs/outputs/checkpoint-580/
  inflating: outputs/outputs/checkpoint-580/special_tokens_map.json  
  inflating: outputs/outputs/checkpoint-580/optimizer.pt  
  inflating: outputs/outputs/checkpoint-580/adapter_model.safetensors  
  inflating: outputs/outputs/checkpoint-580/rng_state.pth  
  inflating: outputs/outputs/checkpoint-580/tokenizer_config.json  
  inflating: outputs/outputs/checkpoint-580/scaler.pt  
  inflating: outputs/outputs/checkpoint-580/merges.txt  
  inflating: outputs/outputs/checkpoint-580/scheduler.pt  
  inflating: outputs/outputs/checkpoint-580/training_args.bin  
  inflating: outputs/outputs/checkpoint-580/vocab.json  
  inflating: outputs/outputs/checkpoint-580/added_tokens.json  
  inflating: outputs/outputs/checkpoint-580/trainer_state.json  
  inflating: outputs/outputs/checkpoint-580/adapter_config.json  
  inflating: outputs/outputs/checkpoint-580/tokenizer.json  
  inflating: out

In [None]:
from transformers import AutoTokenizer, AutoModelForCausalLM

model_path = "outputs"

tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForCausalLM.from_pretrained(model_path, torch_dtype="auto", device_map="auto")
model.eval()

ValueError: Unrecognized model in outputs. Should have a `model_type` key in its config.json, or contain one of the following strings in its name: albert, align, altclip, aria, aria_text, audio-spectrogram-transformer, autoformer, aya_vision, bamba, bark, bart, beit, bert, bert-generation, big_bird, bigbird_pegasus, biogpt, bit, bitnet, blenderbot, blenderbot-small, blip, blip-2, blip_2_qformer, bloom, bridgetower, bros, camembert, canine, chameleon, chinese_clip, chinese_clip_vision_model, clap, clip, clip_text_model, clip_vision_model, clipseg, clvp, code_llama, codegen, cohere, cohere2, colpali, conditional_detr, convbert, convnext, convnextv2, cpmant, csm, ctrl, cvt, d_fine, dab-detr, dac, data2vec-audio, data2vec-text, data2vec-vision, dbrx, deberta, deberta-v2, decision_transformer, deepseek_v3, deformable_detr, deit, depth_anything, depth_pro, deta, detr, diffllama, dinat, dinov2, dinov2_with_registers, distilbert, donut-swin, dpr, dpt, efficientformer, efficientnet, electra, emu3, encodec, encoder-decoder, ernie, ernie_m, esm, falcon, falcon_mamba, fastspeech2_conformer, flaubert, flava, fnet, focalnet, fsmt, funnel, fuyu, gemma, gemma2, gemma3, gemma3_text, git, glm, glm4, glpn, got_ocr2, gpt-sw3, gpt2, gpt_bigcode, gpt_neo, gpt_neox, gpt_neox_japanese, gptj, gptsan-japanese, granite, granite_speech, granitemoe, granitemoehybrid, granitemoeshared, granitevision, graphormer, grounding-dino, groupvit, helium, hgnet_v2, hiera, hubert, ibert, idefics, idefics2, idefics3, idefics3_vision, ijepa, imagegpt, informer, instructblip, instructblipvideo, internvl, internvl_vision, jamba, janus, jetmoe, jukebox, kosmos-2, layoutlm, layoutlmv2, layoutlmv3, led, levit, lilt, llama, llama4, llama4_text, llava, llava_next, llava_next_video, llava_onevision, longformer, longt5, luke, lxmert, m2m_100, mamba, mamba2, marian, markuplm, mask2former, maskformer, maskformer-swin, mbart, mctct, mega, megatron-bert, mgp-str, mimi, mistral, mistral3, mixtral, mlcd, mllama, mobilebert, mobilenet_v1, mobilenet_v2, mobilevit, mobilevitv2, modernbert, moonshine, moshi, mpnet, mpt, mra, mt5, musicgen, musicgen_melody, mvp, nat, nemotron, nezha, nllb-moe, nougat, nystromformer, olmo, olmo2, olmoe, omdet-turbo, oneformer, open-llama, openai-gpt, opt, owlv2, owlvit, paligemma, patchtsmixer, patchtst, pegasus, pegasus_x, perceiver, persimmon, phi, phi3, phi4_multimodal, phimoe, pix2struct, pixtral, plbart, poolformer, pop2piano, prompt_depth_anything, prophetnet, pvt, pvt_v2, qdqbert, qwen2, qwen2_5_omni, qwen2_5_vl, qwen2_5_vl_text, qwen2_audio, qwen2_audio_encoder, qwen2_moe, qwen2_vl, qwen2_vl_text, qwen3, qwen3_moe, rag, realm, recurrent_gemma, reformer, regnet, rembert, resnet, retribert, roberta, roberta-prelayernorm, roc_bert, roformer, rt_detr, rt_detr_resnet, rt_detr_v2, rwkv, sam, sam_hq, sam_hq_vision_model, sam_vision_model, seamless_m4t, seamless_m4t_v2, segformer, seggpt, sew, sew-d, shieldgemma2, siglip, siglip2, siglip_vision_model, smolvlm, smolvlm_vision, speech-encoder-decoder, speech_to_text, speech_to_text_2, speecht5, splinter, squeezebert, stablelm, starcoder2, superglue, superpoint, swiftformer, swin, swin2sr, swinv2, switch_transformers, t5, table-transformer, tapas, textnet, time_series_transformer, timesfm, timesformer, timm_backbone, timm_wrapper, trajectory_transformer, transfo-xl, trocr, tvlt, tvp, udop, umt5, unispeech, unispeech-sat, univnet, upernet, van, video_llava, videomae, vilt, vipllava, vision-encoder-decoder, vision-text-dual-encoder, visual_bert, vit, vit_hybrid, vit_mae, vit_msn, vitdet, vitmatte, vitpose, vitpose_backbone, vits, vivit, wav2vec2, wav2vec2-bert, wav2vec2-conformer, wavlm, whisper, xclip, xglm, xlm, xlm-prophetnet, xlm-roberta, xlm-roberta-xl, xlnet, xmod, yolos, yoso, zamba, zamba2, zoedepth

In [None]:
!ls

outputNetworkSFT.zip  sample_data


In [None]:
from unsloth import FastLanguageModel
from transformers import TextStreamer

# Make sure these variables are defined BEFORE loading the model
max_seq_length = 4096
dtype = None         # or torch.float16 if you want explicit
load_in_4bit = True

# Define the prompt template variable (you called it slt_prompt)
slt_prompt = """
Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}
"""

# Load model and tokenizer
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name="outputs/outputs/checkpoint-500",  # Your fine-tuned model path
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

# Enable faster inference mode
FastLanguageModel.for_inference(model)

# Prepare inputs (make sure to send tensors to correct device)
inputs = tokenizer(
    [
        slt_prompt.format(
            "You are a networking expert",  # instruction
            "Tell me about routers in cisco, Catalyst 8300-2N2S-6T Give me information about this only. if not hve say no​",  # input
            "",  # output left empty for generation
        )
    ],
    return_tensors="pt",
).to(model.device)  # Use model.device instead of hardcoded "cuda"

# Setup the streamer (skip prompt printing for cleaner output)
text_streamer = TextStreamer(
    tokenizer,
    skip_prompt=True,
    skip_special_tokens=True,
    decode_kwargs={"clean_up_tokenization_spaces": True},
)

# Generate output with streaming
_ = model.generate(
    **inputs,
    streamer=text_streamer,
    max_new_tokens=300,   # 1096 is huge, might cause OOM; 128 or 256 is safer for demo
    do_sample=True,
    temperature=0.1,
    top_p=0.1,
)


==((====))==  Unsloth 2025.5.9: Fast Qwen2 patching. Transformers: 4.51.3.
   \\   /|    Tesla T4. Num GPUs = 1. Max memory: 14.741 GB. Platform: Linux.
O^O/ \_/ \    Torch: 2.6.0+cu124. CUDA: 7.5. CUDA Toolkit: 12.4. Triton: 3.2.0
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.29.post3. FA2 = False]
 "-____-"     Free license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!
To answer this, I would need to gather information about the specific model of Catalyst 8300-2N2S-6T router that is in question. This can be done through various means, including reviewing the product description, contacting the manufacturer, or using online databases and archives. Once I have that information, I can then provide a comprehensive answer that includes details on the router's performance, security features, and more. Let me know if you need any further assistance with this, and I'll do my best to provide the information that