In [None]:
%%capture
import os
if "COLAB_" not in "".join(os.environ.keys()):
    !pip install unsloth
else:
    # Do this only in Colab notebooks! Otherwise use pip install unsloth
    !pip install --no-deps bitsandbytes accelerate xformers==0.0.29.post3 peft trl==0.15.2 triton cut_cross_entropy unsloth_zoo
    !pip install sentencepiece protobuf "datasets>=3.4.1" huggingface_hub hf_transfer
    !pip install --no-deps unsloth

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048  # Choose any! We auto support RoPE Scaling internally!
dtype = None  # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True  # Use 4bit quantization to reduce memory usage. Can be False.

# 4bit pre quantized models we support for 4x faster downloading + no OOMs.
fourbit_models = [
    "unsloth/Meta-Llama-3.1-8B-bnb-4bit",      # Llama-3.1 15 trillion tokens model 2x faster!
    "unsloth/Meta-Llama-3.1-8B-Instruct-bnb-4bit",
    "unsloth/Meta-Llama-3.1-70B-bnb-4bit",
    "unsloth/Meta-Llama-3.1-405B-bnb-4bit",    # We also uploaded 4bit for 405b!
    "unsloth/Mistral-Nemo-Base-2407-bnb-4bit", # New Mistral 12b 2x faster!
    "unsloth/Mistral-Nemo-Instruct-2407-bnb-4bit",
    "unsloth/mistral-7b-v0.3-bnb-4bit",        # Mistral v3 2x faster!
    "unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/Phi-3.5-mini-instruct",           # Phi-3.5 2x faster!
    "unsloth/Phi-3-medium-4k-instruct",
    "unsloth/gemma-2-9b-bnb-4bit",
    "unsloth/gemma-2-27b-bnb-4bit",            # Gemma 2x faster!
]  # More models at https://huggingface.co/unsloth

model, tokenizer = FastLanguageModel.from_pretrained(

    model_name="unsloth/mistral-7b-v0.3-bnb-4bit",
    # model_name="mrbmaryam/SFT_F3",
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
    # token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf
)

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=16,  # Choose any number > 0! Suggested 8, 16, 32, 64, 128
    target_modules=["q_proj", "k_proj", "v_proj", "o_proj",
                    "gate_proj", "up_proj", "down_proj"],
    lora_alpha=16,
    lora_dropout=0,  # Supports any, but = 0 is optimized
    bias="none",     # Supports any, but = "none" is optimized
    # [NEW] "unsloth" uses 30% less VRAM, fits 2x larger batch sizes!
    use_gradient_checkpointing="unsloth",  # True or "unsloth" for very long context
    random_state=3407,
    use_rslora=False,  # We support rank stabilized LoRA
    loftq_config=None,  # And LoftQ
)

Unsloth 2025.6.5 patched 32 layers with 32 QKV layers, 32 O layers and 32 MLP layers.


In [None]:
alpaca_prompt = """You are an advanced summarization and log analysis assistant. Your task is to analyze the given log chunk, summarize critical log events and provide a concise interpretation in the specified format.

Summarization and Interpretation Task:
Analyze the provided log chunk and summarize critical log events indicating errors, failures, malfunctions, or potential issues. Exclude routine events and deduplicate redundant entries. In case of redundancy, only return one of them. For each critical event, provide a severity rating (0-5), brief root cause, potential issues, system impact, and solution. Use this format for results:

<start>[event: <log event>] [severity: <0-5>] [root cause: <brief explanation>] [potential issues: <brief description>] [impact: <brief system impact>] [solution: <brief resolution>]</end>

If no critical events are found, return: <start>normal</end>.

### Input:
Log chunk:
{}

### Response:
Interpretation:
{}
"""

EOS_TOKEN = tokenizer.eos_token # Must add EOS_TOKEN
def formatting_prompts_func(examples):

    inputs       = examples["Chunk"]
    outputs      = examples["GT"]
    texts = []
    for  input, output in zip(inputs, outputs):
        # Must add EOS_TOKEN, otherwise your generation will go on forever!
        text = alpaca_prompt.format(input, output) + EOS_TOKEN
        texts.append(text)
    return { "text" : texts, }
pass

from datasets import load_dataset
dataset = load_dataset("mrbmaryam/Train_F3", split = "train")
dataset = dataset.map(formatting_prompts_func, batched = True,)

In [None]:
dataset[5]["text"]

In [None]:
from trl import SFTTrainer
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported

trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    packing = False, # Can make training 5x faster for short sequences.
    args = TrainingArguments(
        per_device_train_batch_size = 2,
        gradient_accumulation_steps = 4,
        warmup_steps = 5,
        # num_train_epochs = 1, # Set this for 1 full training run.
        max_steps = 60,
        learning_rate = 2e-4,
        fp16 = not is_bfloat16_supported(),
        bf16 = is_bfloat16_supported(),
        logging_steps = 1,
        optim = "adamw_8bit",
        weight_decay = 0.01,
        lr_scheduler_type = "linear",
        seed = 3407,
        output_dir = "outputs",
        report_to = "none", # Use this for WandB etc
    ),
)

In [None]:
trainer_stats = trainer.train()


# Inference

In [None]:
alpaca_prompt = """You are an advanced summarization and log analysis assistant. Your task is to analyze the given log chunk, summarize critical log events and provide a concise interpretation in the specified format.

Summarization and Interpretation Task:
Analyze the provided log chunk and summarize critical log events indicating errors, failures, malfunctions, or potential issues. Exclude routine events and deduplicate redundant entries. In case of redundancy, only return one of them. For each critical event, provide a severity rating (0-5), brief root cause, potential issues, system impact, and solution. Use this format for results:

<start>[event: <log event>] [severity: <0-5>] [root cause: <brief explanation>] [potential issues: <brief description>] [impact: <brief system impact>] [solution: <brief resolution>]</end>

If no critical events are found, return: <start>normal</end>.

Now, analyze the following log chunk and provide its summary and interpretation:
### Input:
Log chunk:
{}
"""

In [None]:
import re
FastLanguageModel.for_inference(model) # Enable native 2x faster inference

def get_summ(response):

  pattern = r"(?<=### Response:\nSummarization and Interpretation:\n)(.*)"


  match = re.search(pattern, response, re.DOTALL)

  if match:
      extracted_text = match.group(1)  # Extract the first capturing group
      # print(extracted_text)
      return extracted_text
  else:
      # print("No match found")
      return None


def get_response(input):
    inputs = tokenizer(
        [
            alpaca_prompt.format(
                input,  # input
                "",  # output - leave this blank for generation!
            )
        ], return_tensors="pt"
    ).to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=800, use_cache=True)

    full_response = tokenizer.batch_decode(outputs)

    print(f"Full response: {full_response}" )

    extracted_summ = get_summ(full_response[0])

    return extracted_summ

In [None]:
import pandas as pd
import os
import time  # Import time module

FastLanguageModel.for_inference(model)

input_file = "/content/OpenStack_test_f3.csv"
output_file = "/content/Test_OpenStack_F3_SFT.csv"
checkpoint_file = "/content/Test_OpenStack_F3_SFT_checkpoint.csv"  # Temp checkpoint file

def process_file(input_file, output_file, checkpoint_file):
    df = pd.read_csv(input_file)

    # Ensure required columns exist
    if 'Chunk' not in df.columns:
        raise ValueError("The input CSV must contain 'Chunk'.")

    # Check if there is a checkpoint file to resume progress
    if os.path.exists(checkpoint_file):
        df_checkpoint = pd.read_csv(checkpoint_file)
        processed_count = len(df_checkpoint[df_checkpoint["Mistral_sft_f3"] != ""])
        print(f"Resuming from checkpoint... {processed_count} rows already processed.")
    else:
        df_checkpoint = df.copy()
        df_checkpoint["Mistral_sft_f3"] = ""  # Initialize column
        processed_count = 0  # Start from the beginning

    counter = processed_count

    for index, row in df.iloc[processed_count:].iterrows():  # Resume from last processed row
        counter += 1
        log_entry = row['Chunk']

        print(f"Counter: {counter}")
        print(f"Log Chunk: {log_entry}")

        try:
            summary = get_response(log_entry)
        except Exception as e:
            print(f"Error processing log entry: {e}")
            summary = "ERROR"

        print(f"Extracted Summary: {summary}")
        df_checkpoint.at[index, "Mistral_sft_f3"] = summary  # Store result
        print("--------------------------------------------------------\n")

        # Save progress every 100 entries
        if counter % 100 == 0:
            df_checkpoint.to_csv(checkpoint_file, index=False)
            print(f"Checkpoint saved at row {counter}")
            elapsed = time.time() - start_time
            mins, secs = divmod(elapsed, 60)
            print(f"Cumulative execution time: {int(mins)} min {int(secs)} sec")

    # Final save after all processing
    df_checkpoint.to_csv(output_file, index=False)
    print("Final results saved!")

    # Remove checkpoint file after completion
    if os.path.exists(checkpoint_file):
        os.remove(checkpoint_file)

# --- Timing Starts Here ---
start_time = time.time()

process_file(input_file, output_file, checkpoint_file)

end_time = time.time()
execution_time = end_time - start_time

print(f"\nTotal execution time: {execution_time:.2f} seconds")


# Save the Model

In [None]:
import getpass
import os

os.environ["HUGGING_FACE_HUB_TOKEN"] = getpass.getpass("Token: ")
assert os.environ["HUGGING_FACE_HUB_TOKEN"]

Token: ··········


In [None]:
from transformers import AutoModel, AutoTokenizer

# Assuming you have already trained your model and tokenizer
model_name = "xxx"
token = "xxx"

# Push model to Hugging Face Hub
model.push_to_hub(model_name)

# Push tokenizer to Hugging Face Hub
tokenizer.push_to_hub(model_name)

print(f"Model and tokenizer successfully pushed to {model_name} on Hugging Face Hub.")

"""**Evaluation**"""