In [None]:
!pip install transformers
!pip install peft
!pip install 'accelerate>=0.26.0'
!pip install -U bitsandbytes
!pip install huggingface-hub
!pip install datasets
!pip install wandb

In [1]:
import json
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from transformers import (AutoTokenizer,
                          AutoModelForCausalLM,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          Trainer,
                          pipeline,
                          DataCollatorForLanguageModeling,
                          PreTrainedTokenizer)
from peft import LoraConfig, get_peft_model
import huggingface_hub
import os
import logging
from tqdm import tqdm
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from typing import List
import wandb
from lora_llm import finetuning, evaluate, inference

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  import pynvml  # type: ignore[import]


In [2]:
os.environ['WANDB_NOTEBOOK_NAME'] = 'lora_llm'
os.environ["WANDB_DISABLED"] = "false"

# Load the API key from the secret.json file
with open('secrets.json', 'r') as file:
    secrets = json.load(file)
    huggingface_hub.login(secrets.get('HF_KEY'))
    wandb.login(key=secrets.get('WANDB_KEY'))

lr = 2e-4
epochs = 2
class_names = []
base_model_name = 'google/gemma-2-9b-it'
label_name = "evasion_label"
fine_tuned_model_path = f"./gemma_9b_{epochs}ep"

# Wandb configuration
run = wandb.init(entity="kontilenia-national-technical-university-of-athens",
                 project='political-speech-clarity',
                 job_type="training",
                 name=str(epochs)+" epoch Gemma",
                 # Track hyperparameters and run metadata
                 config={
                    "learning_rate": lr,
                    "architecture": base_model_name,
                    "dataset": "qevasion_dataset_preproccessed",
                    "epochs": epochs,
                 })

model, tokenizer = finetuning(base_model_name,
                              fine_tuned_model_path,
                              label_name,
                              lr,
                              epochs)

evaluate(base_model_name,
         fine_tuned_model_path,
         "evasion_label",
         "clarity_label",
         "preprocessed_data/test_set.csv",
         False,
         model,
         tokenizer,
         run)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ec2-user/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkontilenia[0m ([33mkontilenia-national-technical-university-of-athens[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


  import pynvml  # type: ignore[import]


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

trainable params: 8945664 || all params: 5088853504
        || trainable%: 0.17578937953251011
Found 3109 instances for training and
    339 instances for validation.
Training...


Step,Training Loss,Validation Loss
129,1.8103,1.801431
258,1.9169,1.773488
387,1.7342,1.744111
516,1.7509,1.75411
645,1.6155,1.725847
774,1.624,1.718775


Device set to use cuda:0
  3%|▎         | 1/35 [00:05<03:12,  5.66s/it]

Right label: dodging
Right label: general
Right label: deflection
Right label: dodging
Right label: dodging
Right label: deflection
Right label: dodging
Right label: dodging


  6%|▌         | 2/35 [00:11<03:02,  5.52s/it]

Right label: general
Right label: deflection
Right label: implicit
Right label: general
Right label: dodging
Right label: general
Right label: explicit
Right label: dodging


  9%|▊         | 3/35 [00:16<02:57,  5.53s/it]

Right label: dodging
Right label: dodging
Right label: dodging
Right label: implicit
Right label: explicit
Right label: general
Right label: deflection
Right label: dodging


 11%|█▏        | 4/35 [00:22<02:58,  5.75s/it]

Right label: deflection
Right label: dodging
Right label: general
Right label: dodging
Right label: general
Right label: general
Right label: dodging
Right label: dodging


 14%|█▍        | 5/35 [00:28<02:57,  5.90s/it]

Right label: explicit
Right label: dodging
Right label: deflection
Right label: deflection
Right label: implicit
Right label: implicit
Right label: dodging
Right label: deflection


 17%|█▋        | 6/35 [00:34<02:46,  5.75s/it]

Right label: dodging
Right label: dodging
Right label: general
Right label: general
Right label: explicit
Right label: general
Right label: claims ignorance
Right label: dodging


 20%|██        | 7/35 [00:39<02:33,  5.47s/it]

Right label: general
Right label: general
Right label: general
Right label: implicit
Right label: dodging
Right label: dodging
Right label: general
Right label: dodging


 23%|██▎       | 8/35 [00:44<02:27,  5.45s/it]

Right label: explicit
Right label: dodging
Right label: dodging
Right label: dodging
Right label: implicit
Right label: explicit
Right label: general
Right label: general


 26%|██▌       | 9/35 [00:49<02:20,  5.40s/it]

Right label: dodging
Right label: dodging
Right label: clarification
Right label: explicit
Right label: general
Right label: deflection
Right label: implicit
Right label: dodging


 29%|██▊       | 10/35 [00:55<02:17,  5.50s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Right label: general
Right label: dodging
Right label: dodging
Right label: dodging
Right label: dodging
Right label: dodging
Right label: explicit
Right label: dodging


 31%|███▏      | 11/35 [01:01<02:14,  5.60s/it]

Right label: implicit
Right label: general
Right label: deflection
Right label: deflection
Right label: deflection
Right label: explicit
Right label: deflection
Right label: deflection


 34%|███▍      | 12/35 [01:07<02:10,  5.68s/it]

Right label: general
Right label: dodging
Right label: dodging
Right label: general
Right label: implicit
Right label: general
Right label: deflection
Right label: implicit


 37%|███▋      | 13/35 [01:13<02:07,  5.80s/it]

Right label: implicit
Right label: general
Right label: implicit
Right label: explicit
Right label: dodging
Right label: deflection
Right label: dodging
Right label: implicit


 40%|████      | 14/35 [01:19<02:01,  5.76s/it]

Right label: general
Right label: dodging
Right label: deflection
Right label: dodging
Right label: implicit
Right label: explicit
Right label: explicit
Right label: general


 43%|████▎     | 15/35 [01:25<01:57,  5.89s/it]

Right label: explicit
Right label: general
Right label: dodging
Right label: dodging
Right label: dodging
Right label: dodging
Right label: dodging
Right label: explicit


 46%|████▌     | 16/35 [01:30<01:50,  5.79s/it]

Right label: implicit
Right label: explicit
Right label: deflection
Right label: dodging
Right label: explicit
Right label: general
Right label: deflection
Right label: dodging


 49%|████▊     | 17/35 [01:36<01:43,  5.73s/it]

Right label: dodging
Right label: general
Right label: explicit
Right label: explicit
Right label: deflection
Right label: explicit
Right label: explicit
Right label: deflection


 51%|█████▏    | 18/35 [01:41<01:36,  5.65s/it]

Right label: implicit
Right label: dodging
Right label: dodging
Right label: general
Right label: dodging
Right label: dodging
Right label: dodging
Right label: general


 54%|█████▍    | 19/35 [01:47<01:28,  5.52s/it]

Right label: general
Right label: deflection
Right label: explicit
Right label: deflection
Right label: general
Right label: dodging
Right label: dodging
Right label: general


 57%|█████▋    | 20/35 [01:52<01:22,  5.49s/it]

Right label: dodging
Right label: deflection
Right label: general
Right label: general
Right label: general
Right label: dodging
Right label: deflection
Right label: general


 60%|██████    | 21/35 [01:58<01:17,  5.55s/it]

Right label: dodging
Right label: deflection
Right label: implicit
Right label: deflection
Right label: dodging
Right label: explicit
Right label: deflection
Right label: dodging


 63%|██████▎   | 22/35 [02:03<01:12,  5.58s/it]

Right label: implicit
Right label: dodging
Right label: implicit
Right label: deflection
Right label: implicit
Right label: dodging
Right label: general
Right label: general


 66%|██████▌   | 23/35 [02:09<01:07,  5.64s/it]

Right label: general
Right label: dodging
Right label: deflection
Right label: general
Right label: implicit
Right label: implicit
Right label: dodging
Right label: implicit


 69%|██████▊   | 24/35 [02:15<01:01,  5.59s/it]

Right label: deflection
Right label: deflection
Right label: dodging
Right label: explicit
Right label: explicit
Right label: general
Right label: implicit
Right label: explicit


 71%|███████▏  | 25/35 [02:21<00:57,  5.74s/it]

Right label: dodging
Right label: dodging
Right label: implicit
Right label: general
Right label: dodging
Right label: dodging
Right label: deflection
Right label: explicit


 74%|███████▍  | 26/35 [02:26<00:51,  5.74s/it]

Right label: deflection
Right label: explicit
Right label: general
Right label: implicit
Right label: implicit
Right label: implicit
Right label: deflection
Right label: dodging


 77%|███████▋  | 27/35 [02:32<00:45,  5.69s/it]

Right label: deflection
Right label: deflection
Right label: deflection
Right label: general
Right label: deflection
Right label: explicit
Right label: clarification
Right label: explicit


 80%|████████  | 28/35 [02:37<00:39,  5.61s/it]

Right label: dodging
Right label: implicit
Right label: explicit
Right label: implicit
Right label: implicit
Right label: deflection
Right label: dodging
Right label: explicit


 83%|████████▎ | 29/35 [02:43<00:33,  5.61s/it]

Right label: dodging
Right label: explicit
Right label: dodging
Right label: dodging
Right label: general
Right label: dodging
Right label: implicit
Right label: deflection


 86%|████████▌ | 30/35 [02:48<00:27,  5.52s/it]

Right label: general
Right label: claims ignorance
Right label: explicit
Right label: dodging
Right label: implicit
Right label: dodging
Right label: implicit
Right label: dodging


 89%|████████▊ | 31/35 [02:54<00:22,  5.68s/it]

Right label: dodging
Right label: explicit
Right label: implicit
Right label: deflection
Right label: implicit
Right label: deflection
Right label: implicit
Right label: dodging


 91%|█████████▏| 32/35 [03:00<00:17,  5.67s/it]

Right label: deflection
Right label: implicit
Right label: dodging
Right label: general
Right label: dodging
Right label: dodging
Right label: dodging
Right label: explicit


 94%|█████████▍| 33/35 [03:06<00:11,  5.68s/it]

Right label: dodging
Right label: dodging
Right label: dodging
Right label: deflection
Right label: dodging
Right label: implicit
Right label: general
Right label: dodging


 97%|█████████▋| 34/35 [03:11<00:05,  5.61s/it]

Right label: explicit
Right label: explicit
Right label: dodging
Right label: general
Right label: dodging
Right label: dodging
Right label: deflection
Right label: implicit


100%|██████████| 35/35 [03:14<00:00,  5.55s/it]

Right label: implicit
Right label: deflection
Right label: general
Right label: general
Accuracy: 0.70
Accuracy for label indirect: 0.95
Accuracy for label direct reply: 0.34
Accuracy for label direct non-reply: 0.09

Classification Report:
                  precision    recall  f1-score   support

        indirect       0.70      0.95      0.81       174
    direct_reply       0.73      0.34      0.47        79
direct_non-reply       1.00      0.09      0.16        23

       micro avg       0.71      0.70      0.71       276
       macro avg       0.81      0.46      0.48       276
    weighted avg       0.73      0.70      0.66       276


Confusion Matrix:
[[165   9   0]
 [ 52  27   0]
 [ 18   1   2]]





['Accuracy: 0.70',
 'Accuracy for label indirect: 0.95',
 'Accuracy for label direct reply: 0.34',
 'Accuracy for label direct non-reply: 0.09',
 'Classification Report:                  precision    recall  f1-score   support\n\n        indirect       0.70      0.95      0.81       174\n    direct_reply       0.73      0.34      0.47        79\ndirect_non-reply       1.00      0.09      0.16        23\n\n       micro avg       0.71      0.70      0.71       276\n       macro avg       0.81      0.46      0.48       276\n    weighted avg       0.73      0.70      0.66       276\n',
 'Confusion Matrix:[[165   9   0]\n [ 52  27   0]\n [ 18   1   2]]']

In [2]:
os.environ['WANDB_NOTEBOOK_NAME'] = 'lora_llm'
os.environ["WANDB_DISABLED"] = "false"

# Load the API key from the secret.json file
with open('secrets.json', 'r') as file:
    secrets = json.load(file)
    huggingface_hub.login(secrets.get('HF_KEY'))
    wandb.login(key=secrets.get('WANDB_KEY'))

lr = 2e-4
epochs = 2
class_names = []
base_model_name = 'google/gemma-2-27b-it'
label_name = "evasion_label"
fine_tuned_model_path = f"./gemma_9b_{epochs}ep"

evaluate(base_model_name,
         fine_tuned_model_path,
         "evasion_label",
         "clarity_label",
         "preprocessed_data/named_test_set_info.csv",
         added_name_summary=False)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ec2-user/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkontilenia[0m ([33mkontilenia-national-technical-university-of-athens[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
  import pynvml  # type: ignore[import]


Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

Device set to use cuda:0
  8%|▊         | 1/13 [00:05<01:09,  5.78s/it]

Right label: implicit
Right label: implicit
Right label: dodging
Right label: dodging
Right label: implicit
Right label: implicit
Right label: implicit
Right label: implicit


 15%|█▌        | 2/13 [00:12<01:06,  6.06s/it]

Right label: implicit
Right label: dodging
Right label: deflection
Right label: implicit
Right label: deflection
Right label: deflection
Right label: implicit
Right label: deflection


 23%|██▎       | 3/13 [00:17<01:00,  6.00s/it]

Right label: dodging
Right label: dodging
Right label: implicit
Right label: implicit
Right label: implicit
Right label: deflection
Right label: declining to answer
Right label: general


 31%|███       | 4/13 [00:23<00:50,  5.64s/it]

Right label: implicit
Right label: implicit
Right label: explicit
Right label: implicit
Right label: implicit
Right label: implicit
Right label: implicit
Right label: implicit


 38%|███▊      | 5/13 [00:28<00:45,  5.65s/it]

Right label: dodging
Right label: implicit
Right label: deflection
Right label: implicit
Right label: dodging
Right label: implicit
Right label: explicit
Right label: dodging


 46%|████▌     | 6/13 [00:34<00:40,  5.79s/it]

Right label: implicit
Right label: implicit
Right label: implicit
Right label: implicit
Right label: deflection
Right label: implicit
Right label: implicit
Right label: implicit


 54%|█████▍    | 7/13 [00:41<00:36,  6.01s/it]

Right label: deflection
Right label: implicit
Right label: implicit
Right label: dodging
Right label: explicit
Right label: explicit
Right label: implicit
Right label: explicit


 62%|██████▏   | 8/13 [00:46<00:28,  5.70s/it]

Right label: implicit
Right label: implicit
Right label: implicit
Right label: explicit
Right label: explicit
Right label: implicit
Right label: dodging
Right label: implicit


 69%|██████▉   | 9/13 [00:51<00:22,  5.65s/it]

Right label: explicit
Right label: implicit
Right label: explicit
Right label: explicit
Right label: explicit
Right label: implicit
Right label: deflection
Right label: implicit


 77%|███████▋  | 10/13 [00:57<00:16,  5.61s/it]You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset


Right label: implicit
Right label: implicit
Right label: implicit
Right label: implicit
Right label: explicit
Right label: implicit
Right label: implicit
Right label: implicit


 85%|████████▍ | 11/13 [01:03<00:11,  5.65s/it]

Right label: implicit
Right label: implicit
Right label: general
Right label: explicit
Right label: explicit
Right label: implicit
Right label: implicit
Right label: explicit


 92%|█████████▏| 12/13 [01:08<00:05,  5.60s/it]

Right label: implicit
Right label: implicit
Right label: implicit
Right label: implicit
Right label: general
Right label: implicit
Right label: implicit
Right label: implicit


100%|██████████| 13/13 [01:09<00:00,  5.32s/it]

Right label: implicit
Accuracy: 0.76
Accuracy for label indirect: 0.95
Accuracy for label direct reply: 0.43
Accuracy for label direct non-reply: 0.00

Classification Report:
                  precision    recall  f1-score   support

        indirect       0.75      0.95      0.84        64
    direct_reply       0.87      0.43      0.58        30
direct_non-reply       0.00      0.00      0.00         3

       micro avg       0.77      0.76      0.77        97
       macro avg       0.54      0.46      0.47        97
    weighted avg       0.76      0.76      0.73        97


Confusion Matrix:
[[61  2  0]
 [17 13  0]
 [ 3  0  0]]



  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])
  _warn_prf(average, modifier, f"{metric.capitalize()} is", result.shape[0])


['Accuracy: 0.76',
 'Accuracy for label indirect: 0.95',
 'Accuracy for label direct reply: 0.43',
 'Accuracy for label direct non-reply: 0.00',
 'Classification Report:                  precision    recall  f1-score   support\n\n        indirect       0.75      0.95      0.84        64\n    direct_reply       0.87      0.43      0.58        30\ndirect_non-reply       0.00      0.00      0.00         3\n\n       micro avg       0.77      0.76      0.77        97\n       macro avg       0.54      0.46      0.47        97\n    weighted avg       0.76      0.76      0.73        97\n',
 'Confusion Matrix:[[61  2  0]\n [17 13  0]\n [ 3  0  0]]']