In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# We have to check which Torch version for Xformers (2.3 -> 0.0.27)
from torch import __version__; from packaging.version import Version as V
xformers = "xformers==0.0.27" if V(__version__) < V("2.4.0") else "xformers"
!pip install --no-deps {xformers} trl peft accelerate bitsandbytes triton

In [None]:
from unsloth import FastLanguageModel
import torch

max_seq_length = 2048 # Choose any! We auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.

# Load the model.
model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = "unsloth/Phi-3.5-mini-instruct",
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.
==((====))==  Unsloth 2024.8: Fast Llama patching. Transformers = 4.44.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.4.0+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.27.post2. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.26G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/140 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/3.37k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/500k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/293 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.84M [00:00<?, ?B/s]

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 16,
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

Unsloth 2024.8 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
from unsloth.chat_templates import get_chat_template

tokenizer = get_chat_template(
    tokenizer,
    chat_template = "phi-3",
    mapping = {"role" : "from", "content" : "value", "user" : "human", "assistant" : "gpt"}, # ShareGPT style
)

def formatting_prompts_func(examples):
    convos = examples["conversations"]
    texts = [tokenizer.apply_chat_template(convo, tokenize = False, add_generation_prompt = False) for convo in convos]
    return { "text" : texts, }
pass

In [None]:
from datasets import load_dataset
from sklearn.model_selection import train_test_split

file_path = '/content/train_test_network.jsonl'
dataset = load_dataset('json', data_files=file_path, split='train')

# Split the dataset
split_dataset = dataset.train_test_split(test_size=0.2, seed=42)  # 20% test data

# Access the training and testing splits
train_dataset = split_dataset['train']
test_dataset = split_dataset['test']

# Further split the training dataset into train and validation sets
train_split = train_dataset.train_test_split(test_size=0.2, seed=42)  # 20% validation data

# Access the new splits
train_dataset = train_split['train']
eval_dataset = train_split['test']

train_dataset = train_dataset.map(formatting_prompts_func, batched = True,)
eval_dataset = eval_dataset.map(formatting_prompts_func, batched = True,)
test_dataset = test_dataset.map(formatting_prompts_func, batched = True,)

Generating train split: 0 examples [00:00, ? examples/s]

Map:   0%|          | 0/135067 [00:00<?, ? examples/s]

Map:   0%|          | 0/33767 [00:00<?, ? examples/s]

Map:   0%|          | 0/42209 [00:00<?, ? examples/s]

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from transformers.utils import logging
logging.set_verbosity_info()

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = train_dataset,
    eval_dataset = eval_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = True, # Packs short sequences together to save time!
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        #max_steps = 80,
        num_train_epochs=1,
        learning_rate = 2e-5,
        fp16 = not torch.cuda.is_bf16_supported(),
        bf16 = torch.cuda.is_bf16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.1,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
    ),
)

PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
PyTorch: setting up devices


Map (num_proc=2):   0%|          | 0/135067 [00:00<?, ? examples/s]

In [None]:
trainer_stats = trainer.train()

In [None]:
import time
import sys

# Initialize the tokenizer with the specified chat template
tokenizer = get_chat_template(
    tokenizer,
    chat_template="phi-3",
    mapping={"role": "from", "content": "value", "user": "human", "assistant": "gpt"},
)

# Enable native 2x faster inference
FastLanguageModel.for_inference(model)

# List to store predictions
predictions = []
ground_truths = []
start_time = time.time()
correct = 0
incorrect = 0
accuracy = 0

printf("Prompt: Given the following network data, classify the traffic as either 'attack' or 'normal' with a single word:\n")

# Loop over each item in the test dataset
for item in test_dataset:

    # Extract the human input from the item
    human_input = item['conversations'][0]['value']

    # Prepare messages with the current input
    message = [{"from": "human", "value": f"Given the following network data, classify the traffic as either 'attack' or 'normal' with a single word:\n\n{human_input}"}]

    # Tokenize the input
    inputs = tokenizer.apply_chat_template(
        message,
        tokenize=True,
        add_generation_prompt=True,  # Must add for generation
        return_tensors="pt",
    ).to("cuda")

    # Generate the output
    outputs = model.generate(input_ids=inputs, max_new_tokens=1, use_cache=True)
    answer = tokenizer.batch_decode(outputs)

    # Extract and append the predicted class
    # Extract the last word
    pred = answer[0].split()[-1]
    truth = item['conversations'][1]['value']

    predictions.append(pred)
    ground_truths.append(truth)

    if pred == truth:
      correct += 1
      color_code = "\033[32m"  # Green
    else:
      incorrect += 1
      color_code = "\033[31m"  # Red

    elapsed_time = time.time() - start_time
    accuracy = correct / (correct+incorrect)

    sys.stdout.write(f"\rElapsed Time: {elapsed_time:.2f} seconds, Accuracy: {accuracy}, Prediction: {color_code}{pred}\033[0m, Ground Truth: {color_code}{truth}\033[0m")
    sys.stdout.flush()
