In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'sentiment-analysis-for-mental-health:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F5338273%2F8870083%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240929%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240929T110046Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Dc5271537d21a65a89e26ab448184afb4edede4faa3b2e67d28236a0c3adad5f7b2d3c131bebd6c4290df626bf1791b1049e1342559c074cd54b7b31dad00ff65f39417f24ade4c2877645de0c4f37a9a6bd4e2297a506b77f27b12232ca8e26bb94635eed81a3da3abf335f01ece23af48e39042eb91cb20870d91a8e84ed1e787e572071b677b6e5ea3cdeeeb1e5866a579f6292dec55403c7107a8a9be8a0fc014dfd16c86b08d7a9cbbe3d720c62751d5985841c27ee93f1329802662c942279cfa8dfe9424d21c9542902e4dd6a7e9b90a5f0b8107685469ed92650026893dd21f616a39c27c705a156a68502bc3c1a68ed27324bb3a30522af971a491a3,combine-data:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F5786818%2F9507493%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240929%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240929T110046Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3Dbfae2f75875bda73fd268b05cd96e9089990048fd454d677facf9da8128d2fc34840ec7c7f37165c5dc8ff407a5f29eff1781564d05597fc0fa1b742bcfcc460b50d28cad8d7f7941fcad7693261e357958c3e87dab85d3ae3c1f5653721b1d4bd4bcc549afebc734c95dd80fa6362851d955bfa33a54c284e6bd9f0f4647153ead4bd200ab24f7a6fa99eb8240d0934afbc3097b4c4026cd0f2773dffe28de17c10f0789b92572e2fcd85bf670b2c0df2c33522e7d38f2b705e3fd3acb1f413ec86d84bb9d7f2bd86ea047f9940a79ef2106804b707dbc9a1fe7a064d96eb2c57bd1762ac54d042cb08782d3b0384b700521ebda9498699bba12641b2939718'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading sentiment-analysis-for-mental-health, 11587194 bytes compressed
Downloaded and uncompressed: sentiment-analysis-for-mental-health
Downloading combine-data, 1233851 bytes compressed
Downloaded and uncompressed: combine-data
Data source import complete.


## Setting up

In [None]:
%%capture
%pip install -U bitsandbytes
%pip install -U transformers
%pip install -U accelerate
%pip install -U peft
%pip install -U trl wandb

In [None]:
import wandb
from google.colab import userdata
userdata.get('w_b_scerts')
# 0b9ef567374cf5ac9ad70183a7a95a810f76ed5d
wandb.login()
run = wandb.init(
    project='Fine-tune llama-3.1-8b-it on Sentiment Analysis Dataset',
    job_type="training",
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mahmed-mostafa22200028[0m ([33mcrime[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [None]:
import numpy as np
import pandas as pd
import os
from tqdm import tqdm
import bitsandbytes as bnb
import torch
import torch.nn as nn
import transformers
from datasets import Dataset
from peft import LoraConfig, PeftConfig
from trl import SFTTrainer
from trl import setup_chat_format
from transformers import (AutoModelForCausalLM,
                          AutoTokenizer,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          pipeline,
                          logging)
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from sklearn.model_selection import train_test_split

## Loading and processing the dataset

In [None]:
# df = pd.read_csv("/kaggle/input/sentiment-analysis-for-mental-health/Combined Data.csv",index_col = "Unnamed: 0")
df = pd.read_excel("/kaggle/input/combine-data/combine_claass_df_balnce.xlsx", index_col="Unnamed: 0")
# df.loc[:,'status'] = df.loc[:,'status'].str.replace('Bi-Polar','Bipolar')
# df = df[(df.status != "Personality disorder") & (df.status != "Stress") & (df.status != "Suicidal")]
df.head()

Unnamed: 0,text,label
0,وليد عبدالله العتيبي مستشفي عفيف العام شركه ا...,Computer Issues
1,تنبيه باخلاء المسءوليه هذه الرساله ومرفقاتها ...,Peripheral Device Issues
2,الاخوه الزملاء السلام عليكم ورحمه الله وبركاته...,Security Alerts
3,تنبيه باخلاء المسءوليه هذه الرساله ومرفقاتها ...,Uncategorized
4,تنبيه باخلاء المسءوليه هذه الرساله ومرفقاتها ...,Printer Issues


In [None]:
# Shuffle the DataFrame and select only 3000 rows
df = df.sample(frac=1, random_state=85).reset_index(drop=True).head(3000)

# Split the DataFrame
train_size = 0.8
eval_size = 0.1

# Calculate sizes
train_end = int(train_size * len(df))
eval_end = train_end + int(eval_size * len(df))

# Split the data
X_train = df[:train_end]
X_eval = df[train_end:eval_end]
X_test = df[eval_end:]

# Define the prompt generation functions
def generate_prompt(data_point):
    return f"""
            Classify the text into "Computer Issues", "Peripheral Device Issues", "Security Alerts"	, "Uncategorized", "Printer Issues", "Job Orders", "Network Issues","Miscellaneous", "Maintenance Requests", "General Hardware Issues" and return the answer as the corresponding mental health disorder label.
text: {data_point["text"]}
label: {data_point["label"]}""".strip()

def generate_test_prompt(data_point):
    return f"""
            Classify the text into "Computer Issues", "Peripheral Device Issues", "Security Alerts"	, "Uncategorized", "Printer Issues", "Job Orders", "Network Issues","Miscellaneous", "Maintenance Requests", "General Hardware Issues" and return the answer as the corresponding mental health disorder label.
text: {data_point["text"]}
label: """.strip()

# Generate prompts for training and evaluation data
X_train.loc[:,'text'] = X_train.apply(generate_prompt, axis=1)
X_eval.loc[:,'text'] = X_eval.apply(generate_prompt, axis=1)

# Generate test prompts and extract true labels
y_true = X_test.loc[:,'label']
X_test = pd.DataFrame(X_test.apply(generate_test_prompt, axis=1), columns=["text"])

In [None]:
X_train.label.value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
Security Alerts,223
Printer Issues,221
Maintenance Requests,220
Computer Issues,217
Peripheral Device Issues,216
Network Issues,216
General Hardware Issues,214
Job Orders,213
Uncategorized,205
Miscellaneous,199


In [None]:
y_true.value_counts()

Unnamed: 0_level_0,count
label,Unnamed: 1_level_1
Uncategorized,33
Miscellaneous,33
Network Issues,31
General Hardware Issues,28
Maintenance Requests,26
Peripheral Device Issues,26
Computer Issues,26
Security Alerts,24
Job Orders,21
Printer Issues,20


In [None]:
# Convert to datasets
train_data = Dataset.from_pandas(X_train[["text"]])
eval_data = Dataset.from_pandas(X_eval[["text"]])

In [None]:
train_data['text'][3]

'Classify the text into "Computer Issues", "Peripheral Device Issues", "Security Alerts"\t, "Uncategorized", "Printer Issues", "Job Orders", "Network Issues","Miscellaneous", "Maintenance Requests", "General Hardware Issues" and return the answer as the corresponding mental health disorder label.\ntext: السلام عليكم ورحمه الله وبركاته لدينا مشكله في ربط جهاز مع شبكه المستشفي علي تنبيه باخلاء المسءوليه هذه الرساله ومرفقاتها معده لاستخدام المرسل اليه المقصود بالرساله فقط و قد تحتوي علي معلومات سريه او محميه قانونيا ان لم تكن الشخص المقصود فانه يمنع منعا باتا اي عرض او نشر او استخدام غير مصرح به للمحتوي نرجو اخطار المرسل عن طريق الرد علي هذا البريد الالكتروني واتلاف جميع النسخ الموجوده لديك تعد التصريحات و الاراء المذكوره في الرساله خاصه بالمرسل و لا تمثل وزاره الصحه كما لا تتحمل الوزاره مسءوليه الاضرار الناتجه عن اي فيروسات قد تحملها هذه الرساله  ملاحظة التاريخ صباحا المؤلف نوع التكنولوجيا الوقت معدل إعادة تعيين إلى المهندس عمر مختار ملاحظة ملاحظة التاريخ مساءًا المؤلف عمر مختار نوع التك

## Loading the model and tokenizer

In [None]:
# !pip install llama-stack

In [None]:
!llama model list

+----------------------------------+------------------------------------------+----------------+
[1m[97m| Model Descriptor                 | HuggingFace Repo                         | Context Length |[0m
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-8B                      | meta-llama/Llama-3.1-8B                  | 128K           |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-70B                     | meta-llama/Llama-3.1-70B                 | 128K           |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-405B:bf16-mp8           | meta-llama/Llama-3.1-405B                | 128K           |
+----------------------------------+------------------------------------------+----------------+
| Llama3.1-405B                    | meta-llama/Llama-3.1-405B-FP8            | 128K           |
+----------------

In [None]:
# !llama model download --source meta --model-id  MODEL_ID

In [None]:
!huggingface-cli login


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    A token is already saved on your machine. Run `huggingface-cli whoami` to get more information or `huggingface-cli logout` if you want to log out.
    Setting a new token will erase the existing one.
    To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) Y
Token is valid (permission: fineGr

In [None]:
# base_model_name = "meta-llama/Meta-Llama-3-8B"
base_model_name = "meta-llama/Llama-3.2-1B-Instruct"
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=False,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype="float16",
)

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    device_map="auto",
    torch_dtype="float16",
    quantization_config=bnb_config,
)

model.config.use_cache = False
model.config.pretraining_tp = 1

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


In [None]:
tokenizer = AutoTokenizer.from_pretrained(base_model_name)

tokenizer.pad_token_id = tokenizer.eos_token_id

## Model evalution before fine-tuning

In [None]:
def predict(test, model, tokenizer):
    y_pred = []
    categories = ["Computer Issues", "Peripheral Device Issues",
                  "Security Alerts"	, "Uncategorized",
                  "Printer Issues", "Job Orders"	,
                  "Network Issues"	,"Miscellaneous",
                  "Maintenance Requests","General Hardware Issues"]

    for i in tqdm(range(len(test))):
        prompt = test.iloc[i]["text"]
        pipe = pipeline(task="text-generation",
                        model=model,
                        tokenizer=tokenizer,
                        max_new_tokens=2,
                        temperature=0.1)

        result = pipe(prompt)
        answer = result[0]['generated_text'].split("label:")[-1].strip()

        # Determine the predicted category
        for category in categories:
            if category.lower() in answer.lower():
                y_pred.append(category)
                break
        else:
            y_pred.append("none")

    return y_pred

In [None]:
y_pred = predict(X_test, model, tokenizer)

  0%|          | 0/268 [00:00<?, ?it/s]Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)
100%|██████████| 268/268 [00:35<00:00,  7.62it/s]


In [None]:
def evaluate(y_true, y_pred):
    labels = ["Computer Issues", "Peripheral Device Issues",
              "Security Alerts"	, "Uncategorized",
              "Printer Issues", "Job Orders"	,
              "Network Issues"	,"Miscellaneous",
              "Maintenance Requests", "General Hardware Issues"	,]
    mapping = {label: idx for idx, label in enumerate(labels)}

    def map_func(x):
        return mapping.get(x, -1)  # Map to -1 if not found, but should not occur with correct data

    y_true_mapped = np.vectorize(map_func)(y_true)
    y_pred_mapped = np.vectorize(map_func)(y_pred)

    # Calculate accuracy
    accuracy = accuracy_score(y_true=y_true_mapped, y_pred=y_pred_mapped)
    print(f'Accuracy: {accuracy:.3f}')

    # Generate accuracy report
    unique_labels = set(y_true_mapped)  # Get unique labels

    for label in unique_labels:
        label_indices = [i for i in range(len(y_true_mapped)) if y_true_mapped[i] == label]
        label_y_true = [y_true_mapped[i] for i in label_indices]
        label_y_pred = [y_pred_mapped[i] for i in label_indices]
        label_accuracy = accuracy_score(label_y_true, label_y_pred)
        print(f'Accuracy for label {labels[label]}: {label_accuracy:.3f}')

    # Generate classification report
    class_report = classification_report(y_true=y_true_mapped, y_pred=y_pred_mapped, target_names=labels, labels=list(range(len(labels))))
    print('\nClassification Report:')
    print(class_report)

    # Generate confusion matrix
    conf_matrix = confusion_matrix(y_true=y_true_mapped, y_pred=y_pred_mapped, labels=list(range(len(labels))))
    print('\nConfusion Matrix:')
    print(conf_matrix)

In [None]:
evaluate(y_true, y_pred)

Accuracy: 0.060
Accuracy for label Computer Issues: 0.269
Accuracy for label Peripheral Device Issues: 0.000
Accuracy for label Security Alerts: 0.000
Accuracy for label Uncategorized: 0.000
Accuracy for label Printer Issues: 0.000
Accuracy for label Job Orders: 0.000
Accuracy for label Network Issues: 0.000
Accuracy for label Miscellaneous: 0.273
Accuracy for label Maintenance Requests: 0.000
Accuracy for label General Hardware Issues: 0.000

Classification Report:
                          precision    recall  f1-score   support

         Computer Issues       0.09      0.27      0.13        26
Peripheral Device Issues       0.00      0.00      0.00        26
         Security Alerts       0.00      0.00      0.00        24
           Uncategorized       0.00      0.00      0.00        33
          Printer Issues       0.00      0.00      0.00        20
              Job Orders       0.00      0.00      0.00        21
          Network Issues       0.00      0.00      0.00        31


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Extracting the linear modules names

In [None]:
import bitsandbytes as bnb
def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

In [None]:
modules = find_all_linear_names(model)
modules

['q_proj', 'k_proj', 'up_proj', 'down_proj', 'v_proj', 'gate_proj', 'o_proj']

## Setting up the model

In [None]:
n_classes = len(y_true.value_counts())
n_classes

10

In [None]:
# !pip install -q -U torchmetrics
import torch
from torchmetrics import F1Score, Precision, Recall, Accuracy
n_classes = len(y_true.value_counts())
# Convert NumPy arrays to PyTorch tensors and calculate metrics
def compute_metrics(p):
  preds = torch.from_numpy(p.predictions)
  labels = torch.from_numpy(p.label_ids)
  preds = torch.argmax(preds, axis=1)

  f1_score = F1Score(task="multiclass", num_classes=n_classes, average='macro')
  macro_f1 = f1_score(preds, labels)

  precision = Precision(task="multiclass", num_classes=n_classes, average='macro')
  macro_precision = precision(preds, labels)

  recall = Recall(task="multiclass", num_classes=n_classes, average='macro')
  macro_recall = recall(preds, labels)

  accuracy = Accuracy(task="multiclass", num_classes=n_classes)
  acc = accuracy(preds, labels)

  return {'macro_f1' : macro_f1, 'macro_precision': macro_precision, 'macro_recall': macro_recall, 'accuracy': acc}

In [None]:
output_dir="llama-3.2-instruct-fine-tuned-model"

peft_config = LoraConfig(
    lora_alpha=16,
    lora_dropout=0,
    r=64,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules,
)

training_arguments = TrainingArguments(
    output_dir=output_dir,                    # directory to save and repository id
    num_train_epochs=1,                       # number of training epochs
    per_device_train_batch_size=1,            # batch size per device during training
    gradient_accumulation_steps=16,            # number of steps before performing a backward/update pass
    gradient_checkpointing=True,
    optim="paged_adamw_32bit",
    logging_steps=3,
    learning_rate=2e-5,                       # learning rate, based on QLoRA paper
    weight_decay=0.001,
    fp16=True,
    bf16=False,
    max_grad_norm=0.3,                        # max gradient norm based on QLoRA paper
    max_steps=-1,
    warmup_ratio=0.03,                        # warmup ratio based on QLoRA paper
    group_by_length=False,
    lr_scheduler_type="cosine",               # use cosine learning rate scheduler
    report_to="wandb",                  # report metrics to w&b
    eval_strategy="steps",              # save checkpoint every epoch
    eval_steps = 0.2
)

trainer = SFTTrainer(
    model=model,
    args=training_arguments,
    train_dataset=train_data,
    eval_dataset=eval_data,
    peft_config=peft_config,
    dataset_text_field="text",
    tokenizer=tokenizer,
    max_seq_length=128,
    packing=False,
    # compute_metrics=compute_metrics,
    dataset_kwargs={
    "add_special_tokens": False,
    "append_concat_token": False,
    }
)


Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/2144 [00:00<?, ? examples/s]

Map:   0%|          | 0/268 [00:00<?, ? examples/s]

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


## Model Training

In [None]:
# Train model
trainer.train()

  return fn(*args, **kwargs)
  with torch.enable_grad(), device_autocast_ctx, torch.cpu.amp.autocast(**ctx.cpu_autocast_kwargs):  # type: ignore[attr-defined]


Step,Training Loss,Validation Loss
27,3.7431,3.654872
54,2.4653,2.449952
81,1.9353,1.98087
108,1.7762,1.833629


TrainOutput(global_step=134, training_loss=2.609753494832053, metrics={'train_runtime': 677.3124, 'train_samples_per_second': 3.165, 'train_steps_per_second': 0.198, 'total_flos': 1676299718246400.0, 'train_loss': 2.609753494832053, 'epoch': 1.0})

In [None]:
wandb.finish()
model.config.use_cache = True

VBox(children=(Label(value='0.024 MB of 0.024 MB uploaded\r'), FloatProgress(value=1.0, max=1.0)))

0,1
eval/loss,█▃▂▁
eval/runtime,█▂▁▂
eval/samples_per_second,▁▇█▇
eval/steps_per_second,▁▇█▇
train/epoch,▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
train/global_step,▁▁▁▁▂▂▂▂▂▂▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▅▆▆▆▆▇▇▇▇▇▇███
train/grad_norm,▄▃▄▄▄▄▅▅▅▅▅▅▆▇▇█▆▅▄▄▃▂▃▂▂▂▁▁▁▂▂▁▂▁▂▁▁▁▂▂
train/learning_rate,▅████████▇▇▇▇▆▆▆▆▆▅▅▅▄▄▄▄▃▃▃▃▂▂▂▂▂▁▁▁▁▁▁
train/loss,████▇▇▇▆▆▆▅▅▅▄▄▃▃▃▃▂▂▂▂▂▂▂▂▂▂▂▂▂▁▂▁▂▁▁▁▁

0,1
eval/loss,1.83363
eval/runtime,8.3773
eval/samples_per_second,31.991
eval/steps_per_second,4.059
total_flos,1676299718246400.0
train/epoch,1.0
train/global_step,134.0
train/grad_norm,0.63046
train/learning_rate,0.0
train/loss,1.7066


## Saving the model and tokenizer

In [None]:
# Save trained model and tokenizer
trainer.save_model(output_dir)
tokenizer.save_pretrained(output_dir)

('llama-3.2-instruct-fine-tuned-model/tokenizer_config.json',
 'llama-3.2-instruct-fine-tuned-model/special_tokens_map.json',
 'llama-3.2-instruct-fine-tuned-model/tokenizer.json')

## Testing model after fine-tuning

In [None]:
y_pred = predict(X_test, model, tokenizer)
evaluate(y_true, y_pred)

  return fn(*args, **kwargs)
100%|██████████| 268/268 [01:19<00:00,  3.35it/s]

Accuracy: 0.168
Accuracy for label Computer Issues: 0.000
Accuracy for label Peripheral Device Issues: 0.000
Accuracy for label Security Alerts: 0.000
Accuracy for label Uncategorized: 0.848
Accuracy for label Printer Issues: 0.000
Accuracy for label Job Orders: 0.000
Accuracy for label Network Issues: 0.000
Accuracy for label Miscellaneous: 0.515
Accuracy for label Maintenance Requests: 0.000
Accuracy for label General Hardware Issues: 0.000

Classification Report:
                          precision    recall  f1-score   support

         Computer Issues       0.00      0.00      0.00        26
Peripheral Device Issues       0.00      0.00      0.00        26
         Security Alerts       0.00      0.00      0.00        24
           Uncategorized       0.16      0.85      0.27        33
          Printer Issues       0.00      0.00      0.00        20
              Job Orders       0.00      0.00      0.00        21
          Network Issues       0.00      0.00      0.00        31



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [None]:
# Save trained model and tokenizer
# !pip install -q -U rouge_score
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer

# Assuming 'preds' and 'real_values' are your model's predictions and true labels
# Convert predictions and true labels to lists of strings if they are not already
predicted_labels = [str(p) for p in y_pred] # removed .item() method as it's not needed for strings
true_labels = [str(r) for r in y_true] # removed .item() method as it's not needed for elements in a series


# Calculate ROUGE scores
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
rouge_scores = []
for pred_label, true_label in zip(predicted_labels, true_labels):
  scores = scorer.score(true_label, pred_label)
  rouge_scores.append(scores)

# Calculate average ROUGE scores
avg_rouge1 = sum([s['rouge1'].fmeasure for s in rouge_scores]) / len(rouge_scores)
avg_rouge2 = sum([s['rouge2'].fmeasure for s in rouge_scores]) / len(rouge_scores)
avg_rougeL = sum([s['rougeL'].fmeasure for s in rouge_scores]) / len(rouge_scores)


# Calculate BLEU scores
bleu_scores = []
for pred_label, true_label in zip(predicted_labels, true_labels):
    reference = [true_label.split()]
    candidate = pred_label.split()
    bleu_scores.append(sentence_bleu(reference, candidate))

# Calculate average BLEU scores
avg_bleu_score = sum(bleu_scores) / len(bleu_scores)

print(f"Average ROUGE-1: {avg_rouge1}")
print(f"Average ROUGE-2: {avg_rouge2}")
print(f"Average ROUGE-L: {avg_rougeL}")
print(f"Average BLEU Score: {avg_bleu_score}")

Average ROUGE-1: 0.16417910447761194
Average ROUGE-2: 0.0
Average ROUGE-L: 0.16417910447761194
Average BLEU Score: 2.9910674453580224e-232


The hypothesis contains 0 counts of 2-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()
