In [None]:
!pip install transformers
!pip install peft
!pip install 'accelerate>=0.26.0'
!pip install -U bitsandbytes
!pip install huggingface-hub
!pip install datasets
!pip install wandb

In [1]:
import json
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset
from transformers import (AutoTokenizer,
                          AutoModelForCausalLM,
                          BitsAndBytesConfig,
                          TrainingArguments,
                          Trainer,
                          pipeline,
                          DataCollatorForLanguageModeling,
                          PreTrainedTokenizer)
from peft import LoraConfig, get_peft_model
import huggingface_hub
import os
import logging
from tqdm import tqdm
from sklearn.metrics import (accuracy_score,
                             classification_report,
                             confusion_matrix)
from typing import List
import wandb
from lora_llm import finetuning, evaluate, inference

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  import pynvml  # type: ignore[import]


In [2]:
os.environ['WANDB_NOTEBOOK_NAME'] = 'lora_llm'
os.environ["WANDB_DISABLED"] = "true"

# Load the API key from the secret.json file
with open('secrets.json', 'r') as file:
    secrets = json.load(file)
    huggingface_hub.login(secrets.get('HF_KEY'))
    wandb.login(key=secrets.get('WANDB_KEY'))

lr = 2e-4
epochs = 5
class_names = []
base_model_name = 'google/gemma-2-27b-it'
label_name = "evasion_label"
fine_tuned_model_path = f"./ministral_{epochs}ep"

evaluate(base_model_name,
         base_model_name,
         "evasion_label",
         "clarity_label",
         "preprocessed_data/test_set.csv",
         added_name_summary=False)

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /home/ec2-user/.netrc
[34m[1mwandb[0m: Currently logged in as: [33mkontilenia[0m ([33mkontilenia-national-technical-university-of-athens[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
  import pynvml  # type: ignore[import]


Loading checkpoint shards:   0%|          | 0/12 [00:00<?, ?it/s]

  0%|          | 0/138 [00:00<?, ?it/s]The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
  1%|          | 1/138 [00:06<14:56,  6.54s/it]

You will be given a part of an interview.Classify the response to the selected questioninto one of the following categories: Dodging, General, Implicit, Explicit, Deflection, Declining to answer, Partial/half-answer, Claims ignorance, ClarificationOutput ONLY the label, nothing else.
. 

 ### Part of the interview ### 
Intervier: Q. What about the redline, sir? 
Response: Well, the world has made it clear that these tests caused us to come together and work in the United Nations to send a clear message to the North Korean regime. We're bound up together with a common strategy to solve this issue peacefully through diplomatic means.Kevin [Kevin Corke, NBC News]. 

### Selected Question ###
 Inquiring about the status or information regarding the redline. 

Label:
Wrong label: 
You will be given a part of an interview.Classify the response to the selected questioninto one of the following categories: Dodging, General, Implicit, Explicit, Deflection, Declining to answer, Partial/half-answ

  1%|▏         | 2/138 [00:13<15:18,  6.76s/it]

You will be given a part of an interview.Classify the response to the selected questioninto one of the following categories: Dodging, General, Implicit, Explicit, Deflection, Declining to answer, Partial/half-answer, Claims ignorance, ClarificationOutput ONLY the label, nothing else.
. 

 ### Part of the interview ### 
Intervier: Q. Harsh. Mr. President, Japan has dropped the threat of sanctions from its proposed Security Council resolution about North Korea. Why was that necessary? And how do you punish or penalize a country that's already among the poorest and most isolated in the world? 
Response: I think that the purpose of the U.N. Security Council resolution is to send a clear message to the leader of that the world condemns that which he did. Part of our strategy, as you know, has been to have others at the table, is to say as clearly as possible to the n, Get rid of your weapons, and there's a better way forward. In other words, there's a choice for him to make. He can verifiab

  2%|▏         | 3/138 [00:20<15:20,  6.82s/it]

You will be given a part of an interview.Classify the response to the selected questioninto one of the following categories: Dodging, General, Implicit, Explicit, Deflection, Declining to answer, Partial/half-answer, Claims ignorance, ClarificationOutput ONLY the label, nothing else.
. 

 ### Part of the interview ### 
Intervier: Q. Thank you, Mr. President. Back on Iraq, a group of American and Iraqi health officials today released a report saying that 655,000 Iraqis have died since the Iraq war. That figure is 20 times the figure that you cited in December, at 30,000. Do you care to amend or update your figure, and do you consider this a credible report? 
Response: No, I don't consider it a credible report; neither does General Casey and neither do Iraqi officials. I do know that a lot of innocent people have died, and that troubles me, and it grieves me. And I applaud the Iraqis for their courage in the face of violence. I am amazed that this is a society which so wants to be free t

  3%|▎         | 4/138 [00:27<15:56,  7.14s/it]

You will be given a part of an interview.Classify the response to the selected questioninto one of the following categories: Dodging, General, Implicit, Explicit, Deflection, Declining to answer, Partial/half-answer, Claims ignorance, ClarificationOutput ONLY the label, nothing else.
. 

 ### Part of the interview ### 
Intervier: Q. Do you feel in some way that there is some shift going on in terms of the general support for the war in Iraq and your strategy specifically? And do you ever feel like the walls are closing in on you in terms of support for this? 
Response: [] Jim, I understand how hard it is, and I also understand the stakes. And let me go back to Senator Warner. Senator Warner said, If the plan isn't working, adjust. I agree completely. I haven't seen Baker's report yet, but one of the things I remind you of is that I don't hear those people saying, get out before the job is done. They're saying, be flexible. And we are.I believe that you empower your generals to make the

  3%|▎         | 4/138 [00:30<17:02,  7.63s/it]


KeyboardInterrupt: 

In [2]:
cache_dir = "/home/ec2-user/SageMaker"

model = AutoModelForCausalLM.from_pretrained(
    "google/gemma-2-27b-it",
    return_dict=True,
    low_cpu_mem_usage=True,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.float16
    ),
    # dtype=torch.float16,
    device_map="balanced",
    trust_remote_code=True,
    offload_folder="offload/",
    cache_dir=cache_dir,
    attn_implementation="eager" 
)
tokenizer = AutoTokenizer.from_pretrained("google/gemma-2-27b-it", use_fast=True)

# tokenizer.pad_token = tokenizer.eos_token
# model.config.pad_token_id = tokenizer.pad_token_id
# model.config.eos_token_id = tokenizer.eos_token_id

input_text = (
    "<start_of_turn>user\n"
    "Write me a poem about Machine Learning."
    "<end_of_turn>\n"
    "<start_of_turn>model\n"
)

inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

with torch.inference_mode():
    outputs = model.generate(
        **inputs,
        max_new_tokens=200,
        temperature=0.7,
        top_p=0.9,
        eos_token_id=tokenizer.eos_token_id,
        pad_token_id=tokenizer.pad_token_id
    )

print(tokenizer.decode(outputs[0], skip_special_tokens=True))


  import pynvml  # type: ignore[import]


Loading checkpoint shards:   0%|          | 0/12 [00:00<?, ?it/s]

The following generation flags are not valid and may be ignored: ['temperature', 'top_p']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


user
Write me a poem about Machine Learning.
model



In [2]:
from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
import torch

model_id = "google/gemma-2-27b-it"

quant = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_compute_dtype=torch.float16)

model = AutoModelForCausalLM.from_pretrained(
    model_id,
    device_map="balanced",
    quantization_config=quant,
    attn_implementation="eager",
    trust_remote_code=True
)

tokenizer = AutoTokenizer.from_pretrained(model_id, use_fast=True)

# Important: set pad/eos tokens
tokenizer.pad_token = tokenizer.eos_token
model.config.pad_token_id = tokenizer.pad_token_id
model.config.eos_token_id = tokenizer.eos_token_id

# Use the chat template
messages = [
    {"role": "user", "content": "Write me a poem about Machine Learning."}
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# Build pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    device_map="balanced"
)

# Run inference
outputs = pipe(
    prompt,
    max_new_tokens=200,
    # do_sample=True,
    temperature=0.7,
    top_p=0.9,
    eos_token_id=tokenizer.eos_token_id,
    pad_token_id=tokenizer.pad_token_id
)

print(outputs[0]["generated_text"])


config.json:   0%|          | 0.00/893 [00:00<?, ?B/s]

  import pynvml  # type: ignore[import]


model.safetensors.index.json:   0%|          | 0.00/42.8k [00:00<?, ?B/s]

Fetching 12 files:   0%|          | 0/12 [00:00<?, ?it/s]



model-00002-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00006-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00001-of-00012.safetensors:   0%|          | 0.00/4.74G [00:00<?, ?B/s]

model-00007-of-00012.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]

model-00003-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00005-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00008-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00004-of-00012.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]



model-00010-of-00012.safetensors:   0%|          | 0.00/4.98G [00:00<?, ?B/s]



model-00009-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00011-of-00012.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

RuntimeError: Data processing error: CAS service error : IO Error: No space left on device (os error 28)