<a href="https://colab.research.google.com/github/RiverGumSecurity/AILabs/blob/dev-joff/notebooks/bhisblogs.ipynb" target="new"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import torch
import warnings
import os
import sys
import pathlib
try:
    import unsloth
except:
    if 'google.colab' in sys.modules:
        !pip install unsloth

# suppresses some noisey warnings which are just annoying
warnings.filterwarnings('ignore')
max_seq_length = 4096

# Setup Hugging Face Credentials.
HF_APIKEY = ''
if 'google.colab' in sys.modules:
    from google.colab import userdata
    HF_APIKEY = userdata.get('HF_APIKEY')
else:
    with open(pathlib.Path.home() / '.hfkey') as hf:
        HF_APIKEY = hf.read().strip()
if not HF_APIKEY:
    print('[-] ERROR: Cannot continue without Hugging Face API Key')
    sys.exit(0)
os.environ['HF_TOKEN'] = HF_APIKEY

model, tokenizer = unsloth.FastLanguageModel.from_pretrained(
    model_name = "unsloth/Meta-Llama-3.1-8B",
    max_seq_length = max_seq_length,
    dtype = None, load_in_4bit = True
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.10.7: Fast Llama patching. Transformers = 4.47.0.dev0.
   \\   /|    GPU: NVIDIA GeForce RTX 4090. Max memory: 23.988 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu124. CUDA = 8.9. CUDA Toolkit = 12.4.
\        /    Bfloat16 = TRUE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth


In [3]:
# PEFT MODEL
model = unsloth.FastLanguageModel.get_peft_model(
    model,
    r = 16, # Suggested choice of 8, 16, 32, 64, or 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
    lora_alpha = 16,
    lora_dropout = 0, # 0 is optimized
    bias = "none",    # "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.10.7 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [4]:
prompt = """
Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
{}

### Input:
{}

### Response:
{}"""

def format_prompts(p):
    # these are provided as lists
    instructions = p["instruction"]
    inputs       = p["input"]
    outputs      = p["output"]
    texts = []
    for ins, inp, outp in zip(instructions, inputs, outputs):
        text = prompt.format(ins, inp, outp) + tokenizer.eos_token
        texts.append(text)
    return { "text" : texts }


In [5]:
import re
import json
import datasets
import requests
from bs4 import BeautifulSoup

# get dataset
r = requests.get('https://raw.githubusercontent.com/RiverGumSecurity/Datasets/refs/heads/main/BHIS/bhis-blogs.json')
data = json.loads(r.text)

# re-format dataset
newds = []
for i, item in enumerate(data):
    s = BeautifulSoup(item['content'], "html.parser")
    content = s.get_text()
    content = re.sub(r'\s+', ' ', content)
    if len(content) < 128:
        continue
    elif len(content) > max_seq_length:
        for line in content.split('\n'):
            if line:
                newds.append({'input': '', 'instruction': '', 'output': f"{item['title']} {' '.join(item['taxonomies'])} {line}"})
    else:
        newds.append({'input': '', 'instruction': '', 'output': f"{item['title']} {' '.join(item['taxonomies'])} {content}"})

ds = datasets.Dataset.from_list(newds)
ds = ds.map(format_prompts, batched=True)

Map:   0%|          | 0/741 [00:00<?, ? examples/s]

In [6]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

newmodel = model
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = ds,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False,
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        #num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 50,
        learning_rate = 2e-4,
        # Floating Point 16 (2 bytes memory use)
        fp16 = not is_bfloat16_supported(),
        # Brain Float 16 (2 bytes memory use but more efficient)
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.02,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

Map (num_proc=2):   0%|          | 0/741 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [7]:
trainer_stats = trainer.train()

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 741 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 2 | Gradient Accumulation steps = 4
\        /    Total batch size = 8 | Total steps = 50
 "-____-"     Number of trainable parameters = 41,943,040


Step,Training Loss
1,0.6048
2,0.481
3,0.6326
4,0.5857
5,0.6068
6,0.5716
7,0.5722
8,0.5674
9,0.5392
10,0.5847


In [8]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

#@title Show final memory and time stats
used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
used_memory_for_lora = round(used_memory - start_gpu_memory, 3)
used_percentage = round(used_memory/max_memory*100, 3)
lora_percentage = round(used_memory_for_lora/max_memory*100, 3)

print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.")
print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.")
print(f"Peak reserved memory = {used_memory} GB.")
print(f"Peak reserved memory for training = {used_memory_for_lora} GB.")
print(f"Peak reserved memory % of max memory = {used_percentage} %.")
print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.")

GPU = NVIDIA GeForce RTX 4090. Max memory = 23.988 GB.
12.428 GB of memory reserved.
404.2051 seconds used for training.
6.74 minutes used for training.
Peak reserved memory = 12.428 GB.
Peak reserved memory for training = 0.0 GB.
Peak reserved memory % of max memory = 51.809 %.
Peak reserved memory for training % of max memory = 0.0 %.


In [None]:
model.push_to_hub('bhisblogs-llama3')
tokenizer.push_to_hub('bhisblogs-llama3')

In [15]:
from transformers import TextStreamer

unsloth.FastLanguageModel.for_inference(model)
inputs = tokenizer([prompt.format('''Tell me all about password hashes''', "", "")
], return_tensors = "pt").to("cuda")

text_streamer = TextStreamer(tokenizer)
_ = model.generate(**inputs, streamer = text_streamer, max_new_tokens = 8192)

<|begin_of_text|>
Below is an instruction that describes a task, paired with an input that provides further context.
Write a response that appropriately completes the request.

### Instruction:
Tell me all about password hashes

### Input:


### Response:
Tell me all about password hashes Author John Strand John Strand // The password hash is one of the most important things you will ever use in security. It is the reason why you can use the same password on every single website you visit. It is the reason why you can use the same password for your banking, your email, your social media, and your Amazon account. It is the reason why you can use the same password for your work account and your home account. It is the reason why you can use the same password for your router and your Wi-Fi. It is the reason why you can use the same password for your VPN and your home computer. It is the reason why you can use the same password for your credit card and your bank account. It is the reason w

KeyboardInterrupt: 