# Install pip libraries, Login to wandb and hugging face, google login

In [None]:
!pip3 install -U huggingface_hub 
!pip3 install -U peft bitsandbytes transformers accelerate
!pip3 install google
!pip3 install -q trl

import torch
import transformers
import accelerate

print(f"torch: {torch.__version__}")
print(f"transformers: {transformers.__version__}")
print(f"accelerate: {accelerate.__version__}")

In [None]:
!pip show accelerate
print(f"accelerate: {accelerate.__version__}")

In [None]:
!pip3 install wandb
!wandb login --relogin token_here

wand_ai = ""
api_key2 = ""

In [None]:
tk=""
!huggingface-cli login

# Load Model


In [None]:
import os
from random import randrange
from functools import partial
import pandas as pd
import torch
from transformers import (
    AutoModelForCausalLM, TFAutoModel,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    Trainer,
    TrainingArguments,
    DataCollatorForLanguageModeling,
    EarlyStoppingCallback,
    pipeline,
    logging,
    set_seed
)

import bitsandbytes as bnb

from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training, PeftModel, AutoPeftModelForCausalLM

In [None]:
model_name='meta-llama/Llama-2-7b-hf'

In [None]:
load_in_4bit = True
bnb_4bit_use_double_quant = True
bnb_4bit_quant_type = 'nf4'
bnb_4bit_compute_dtype = torch.bfloat16

def create_bnb_config(load_in_4bit, bnb_4bit_use_double_quant, bnb_4bit_quant_type, bnb_4bit_compute_dtype):
  bnb_config = BitsAndBytesConfig(
  load_in_4bit = load_in_4bit,
  bnb_4bit_use_double_quant = bnb_4bit_use_double_quant,
  bnb_4bit_quant_type = bnb_4bit_quant_type,
  bnb_4bit_compute_dtype = bnb_4bit_compute_dtype,
 )
  return bnb_config

bnb_config = create_bnb_config(load_in_4bit, bnb_4bit_use_double_quant, bnb_4bit_quant_type, bnb_4bit_compute_dtype)

In [None]:
def load_model(model_name):

  n_gpus = torch.cuda.device_count()
  max_memory = f'{51200}MB'

  device = "cuda" if torch.cuda.is_available() else "cpu"
  print(device)

  # Load model
  model = AutoModelForCausalLM.from_pretrained(
  model_name,
  quantization_config = bnb_config,
  device_map = 'auto',
  max_memory = {i: max_memory for i in range(n_gpus)},
  )
  model.to("cuda")

  # Load tokenizer
  tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token = True)

  # Set padding token as EOS token
  tokenizer.pad_token = tokenizer.eos_token
  return model, tokenizer


model, tokenizer = load_model(model_name)

# Utility Methods

In [None]:
def get_max_length(model):
    """
    Extracts maximum token length from the model configuration

    :param model: Hugging Face model
    """

    # Pull model configuration
    conf = model.config
    # Initialize a "max_length" variable to store maximum sequence length as null
    max_length = None
    # Find maximum sequence length in the model configuration and save it in "max_length" if found
    for length_setting in ["n_positions", "max_position_embeddings", "seq_length"]:
        max_length = getattr(model.config, length_setting, None)
        if max_length:
            print(f"Found max lenth: {max_length}")
            break
    # Set "max_length" to 1024 (default value) if maximum sequence length is not found in the model configuration
    if not max_length:
        max_length = 1024
        print(f"Using default max length: {max_length}")
    return max_length

In [None]:
def create_peft_config(r, lora_alpha, target_modules, lora_dropout, bias, task_type):
    """
    Creates Parameter-Efficient Fine-Tuning configuration for the model

    :param r: LoRA attention dimension
    :param lora_alpha: Alpha parameter for LoRA scaling
    :param modules: Names of the modules to apply LoRA to
    :param lora_dropout: Dropout Probability for LoRA layers
    :param bias: Specifies if the bias parameters should be trained
    """
    config = LoraConfig(
        r=r,
        lora_alpha=lora_alpha,
        target_modules=target_modules,
        lora_dropout=lora_dropout,
        bias=bias,
        task_type=task_type,
        modules_to_save=None

    )
    return config

In [None]:
def find_all_linear_names(model):
  """
  Find modules to apply LoRA to.

  :param model: PEFT model
  """
  cls = bnb.nn.Linear4bit
  lora_module_names = set()
  for name, module in model.named_modules():
      if isinstance(module, cls):
          names = name.split('.')
          lora_module_names.add(names[0] if len(names) == 1 else names[-1])

  if 'lm_head' in lora_module_names:
      lora_module_names.remove('lm_head')
  print(f"LoRA module names: {list(lora_module_names)}")
  return list(lora_module_names)

In [None]:
def print_trainable_parameters(model, use_4bit = False):
  """
  Prints the number of trainable parameters in the model.

  :param model: PEFT model
  """

  trainable_params = 0
  all_param = 0

  for _, param in model.named_parameters():
      num_params = param.numel()
      all_param += num_params
      if param.requires_grad:
          trainable_params += num_params

  if use_4bit:
      trainable_params /= 2

  print(
      f"All Parameters: {all_param:,d} || Trainable Parameters: {trainable_params:,d} || Trainable Parameters %: {100 * trainable_params / all_param:.2f}%"
  )

# Data Preprocessing

In [None]:
!pip install datasets
from datasets import load_dataset

dataset = load_dataset("Prasasthy/trainforother")

print(f'Number of prompts: {len(dataset)}')
print(f'Column names are: {dataset.column_names}')

In [None]:
def clean_dataset(data, seed, tokenizer):


  # Convert DataFrame to Hugging Face Dataset
  hf_dataset = data

  # Step 3: Tokenize the dataset
  def tokenize_function(batch, tokenizer, max_length):
      return tokenizer(
          batch["text"],
          max_length=max_length,
          truncation=True,
      )


  max_length = get_max_length(model)

  # Tokenize the dataset
  preprocessing_function = partial(tokenize_function, tokenizer=tokenizer, max_length=max_length)
  tokenized_dataset = hf_dataset.map(preprocessing_function, batched=True, remove_columns = ["text", "label_name"],)
  tokenized_dataset = tokenized_dataset.rename_column("label", "labels")

  # Step 4: Shuffle and finalize dataset
  # Shuffle dataset for better training
  tokenized_dataset = tokenized_dataset.shuffle(seed=seed)

  return tokenized_dataset

seed = 42
# Final dataset ready for fine-tuning
preprocessed_dataset = clean_dataset(dataset,seed, tokenizer)
train_test = preprocessed_dataset['train'].train_test_split(test_size=0.2, seed=42)
train_dataset = train_test["train"]
valid_dataset = train_test["test"]


print(preprocessed_dataset)

In [None]:
print(dataset)
print(dataset['train'][0])
print(dataset['train'][randrange(len(dataset))])

In [None]:
train_dataset

In [None]:
valid_dataset

# Fine tuning the model


In [None]:
################################################################################
# QLoRA parameters
################################################################################

# LoRA attention dimension
lora_r = 64

# Alpha parameter for LoRA scaling
lora_alpha = 128

# Dropout probability for LoRA layers
lora_dropout = 0.05

# Bias
bias = "none"

# Task type
task_type = "SEQ_CLS"

################################################################################
# TrainingArguments parameters
################################################################################

# Output directory where the model predictions and checkpoints will be stored
output_dir = "./results"

# Batch size per GPU for training
per_device_train_batch_size = 32

# Number of update steps to accumulate the gradients for
gradient_accumulation_steps = 1

# Initial learning rate (AdamW optimizer)
learning_rate = 5e-5

# Optimizer to use
optim = "adamw_torch"

# Number of training steps (overrides num_train_epochs)
max_steps = 10000

# Linear warmup steps from 0 to learning_rate
warmup_steps = 100

# Enable fp16/bf16 training (set bf16 to True with an A100)
fp16 = True
bf16 = True

# Log every X updates steps
logging_steps = 100

# Save checkpoints every X steps
save_steps = 1000  # Saves progress every 100 steps

# Evaluation strategy (Evaluate every few steps)
evaluation_strategy = "steps"
eval_steps = 1000  # Evaluate every 100 steps


In [None]:
def fine_tune(model,
        tokenizer,
        dataset,
        valid_dataset,
        lora_r,
        lora_alpha,
        lora_dropout,
        bias,
        task_type,
        per_device_train_batch_size,
        gradient_accumulation_steps,
        warmup_steps,
        max_steps,
        learning_rate,
        fp16, bf16,
        logging_steps,
        output_dir,
        optim):
  """
  Prepares and fine-tune the pre-trained model.

  :param model: Pre-trained Hugging Face model
  :param tokenizer: Model tokenizer
  :param dataset: Preprocessed training dataset
  """

  from transformers import DataCollatorWithPadding

  # Enable gradient checkpointing to reduce memory usage during fine-tuning
  model.gradient_checkpointing_enable()

  # Prepare the model for training
  model = prepare_model_for_kbit_training(model)

  # Get LoRA module names
  target_modules = find_all_linear_names(model)

  # Create PEFT configuration for these modules and wrap the model to PEFT
  peft_config = create_peft_config(lora_r, lora_alpha, target_modules, lora_dropout, bias, task_type)
  model = get_peft_model(model, peft_config)

  # Print information about the percentage of trainable parameters
  print_trainable_parameters(model)

  data_collator = DataCollatorWithPadding(tokenizer=tokenizer)

  # Training parameters
  trainer = Trainer(
      model = model,
      train_dataset = dataset,
      eval_dataset=valid_dataset,
      args = TrainingArguments(
          per_device_train_batch_size = per_device_train_batch_size,
          per_device_eval_batch_size=32,
          gradient_accumulation_steps = gradient_accumulation_steps,
          warmup_steps = warmup_steps,
          max_steps = max_steps,
          learning_rate = learning_rate,
          fp16 = fp16,
          # bf16=bf16,
          logging_steps = logging_steps,
          output_dir = output_dir,
          optim = optim,
          evaluation_strategy="steps",
          eval_steps=eval_steps,
          save_steps=save_steps,
          save_total_limit=2,  # Keep only last 2 checkpoints
          load_best_model_at_end=True,
      ),
      data_collator = data_collator
  )

  model.config.use_cache = False

  do_train = True

  # Launch training and log metrics
  print("Training...")

  if do_train:
      train_result = trainer.train()
      print(f"✅ Training complete...!! \n {train_result}")
      metrics = train_result.metrics
      trainer.log_metrics("train", metrics)
      trainer.save_metrics("train", metrics)
      trainer.save_state()
      print(metrics)
      print(f"✅ Verification Performance: \n {trainer.evaluate()}")

  # Save model
  print("Saving last checkpoint of the model...")
  os.makedirs(output_dir, exist_ok = True)

  # Save using both PEFT and base model
  trainer.model.save_pretrained(output_dir, safe_serialization=True)
  trainer.model.base_model.save_pretrained(os.path.join(output_dir, "base_model"))
  # model.save_model(output_dir)

  # Free memory for merging weights
  del model
  del trainer
  torch.cuda.empty_cache()

In [None]:
import torch
print("CUDA Available:", torch.cuda.is_available())
print("GPU Device Name:", torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU detected")

CUDA Available: True
GPU Device Name: Tesla T4


# Training

In [None]:
fine_tune(model,
          tokenizer,
          train_dataset,
          valid_dataset,
          lora_r,
          lora_alpha,
          lora_dropout,
          bias,
          task_type,
          per_device_train_batch_size,
          gradient_accumulation_steps,
          warmup_steps,
          max_steps,
          learning_rate,
          fp16,bf16,
          logging_steps,
          output_dir,
          optim
          )

In [None]:
type(model)

# Save and Download

In [None]:
from huggingface_hub import login
from transformers import AutoModelForSequenceClassification, AutoTokenizer

# Log in to Hugging Face
login("token_here")

# Push to Hugging Face Hub
model.push_to_hub("Dumi2025/log-anomaly-detection-model-llama")
tokenizer.push_to_hub("Dumi2025/log-anomaly-detection-model-llama")

# Testing

In [None]:
import evaluate

accuracy = evaluate.load("accuracy")
f1_metric = evaluate.load("f1")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = logits.argmax(axis=-1)

    acc = accuracy.compute(predictions=predictions, references=labels)
    f1 = f1_metric.compute(predictions=predictions, references=labels, average="weighted")

    return {"accuracy": acc["accuracy"], "f1_score": f1["f1"]}

In [None]:
input_text = "Receiving block blk_-3544583377289625738 src: /10.250.19.102:39325 dest: /10.250.19.102:50010"

model.to("cpu")
model.eval()


inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding=True)

# Perform classification
with torch.no_grad():
    outputs = model(**inputs)
    logits = outputs.logits
    predicted_class = torch.argmax(logits, dim=-1).item()

print(f"Predicted Class: {predicted_class}")


Predicted Class: 0


In [None]:
outputs = model(**inputs, output_attentions=True)
print(outputs)

# BertViz

In [None]:
! pip3 install bertviz

In [None]:
from bertviz import model_view, head_view

with torch.no_grad():
  outputs = model(**inputs, output_attentions=True)
  attentions = outputs.attentions

tokens = tokenizer.convert_ids_to_tokens(inputs["input_ids"][0])

model_view(attentions, tokens)

In [None]:
type(tokens)

In [None]:
head_view(attentions, tokens)