In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)

import pandas as pd
from datasets import Dataset
import torch
from datasets import load_dataset
from transformers import TrainingArguments, TextStreamer
import os, wandb
from trl import SFTConfig, setup_chat_format, SFTTrainer


print(torch.cuda.is_available())
print(torch.cuda.current_device())
print(torch.cuda.get_device_name(0))

# Load the augmented CSV
df = pd.read_csv('augmented.csv')


  from .autonotebook import tqdm as notebook_tqdm


True
0
NVIDIA GeForce RTX 3070 Laptop GPU


In [2]:
# Preprocess the input and output (combine features into text format)
def preprocess_data(row):
    input_string = f"Age: {row['Age']}, CreditScore: {row['CreditScore']}, Income: {row['Income']}, " \
                   f"YearsEmployed: {row['YearsEmployed']}, Gender: {'Male' if row['Gender'] == 1 else 'Female'}, " \
                   f"Married: {'Yes' if row['Married'] == 1 else 'No'}, " \
                   f"Industry: {row['Industry']}, Ethnicity: {row['Ethnicity']}, " \
                   f"PriorDefault: {'Yes' if row['PriorDefault'] == 1 else 'No'}, " \
                   f"Employed: {'Yes' if row['Employed'] == 1 else 'No'}"
    
    # Simplified output format
    output_string = f"{'Yes' if row['Approved'] == 1 else 'No'}, {row['Reason']}"
    
    return {"text": input_string, "label": output_string}

# Apply preprocessing to the dataframe
df_processed = df.apply(preprocess_data, axis=1)
df_final = pd.DataFrame(df_processed.tolist())  # Convert to DataFrame of text and labels

# No need to use np.ravel for the labels as we're already returning a single string
df_final


Unnamed: 0,text,label
0,"Age: 30.83, CreditScore: 1, Income: 0, YearsEm...","Yes, This application was approved due to Inco..."
1,"Age: 58.67, CreditScore: 6, Income: 560, Years...","Yes, This application was approved due to Inco..."
2,"Age: 24.5, CreditScore: 0, Income: 824, YearsE...","Yes, This application was approved due to Inco..."
3,"Age: 27.83, CreditScore: 5, Income: 3, YearsEm...","Yes, This application was approved due to Year..."
4,"Age: 20.17, CreditScore: 0, Income: 0, YearsEm...","Yes, This application was approved due to Inco..."
...,...,...
685,"Age: 21.08, CreditScore: 0, Income: 0, YearsEm...","No, This application was denied due to Employe..."
686,"Age: 22.67, CreditScore: 2, Income: 394, Years...","No, This application was denied due to Income,..."
687,"Age: 25.25, CreditScore: 1, Income: 1, YearsEm...","No, This application was denied due to Income,..."
688,"Age: 17.92, CreditScore: 0, Income: 750, Years...","No, This application was denied due to YearsEm..."


In [3]:
# Convert DataFrame to Huggingface Dataset format
dataset = Dataset.from_pandas(df_final)

In [4]:
# !huggingface-cli login      
# !wandb login --relogin API-KEY

In [5]:
from transformers import LlamaForCausalLM, LlamaTokenizer
from transformers import AutoTokenizer
#8B,70B,405B
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.1-8B")

if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

# Tokenize the dataset
def tokenize_function(examples):
    return tokenizer(examples["text"], padding="max_length", truncation=True)

tokenized_dataset = dataset.map(tokenize_function, batched=True, remove_columns=["label"])

# Split dataset into train and test sets
dataset = tokenized_dataset.train_test_split(test_size=0.3)

Map: 100%|██████████| 690/690 [01:04<00:00, 10.62 examples/s]


In [6]:
# Accessing the 'text' field in the train split
print(dataset['train']['text'][3]) 



Age: 33.67, CreditScore: 0, Income: 44, YearsEmployed: 0.375, Gender: Female, Married: Yes, Industry: InformationTechnology, Ethnicity: White, PriorDefault: No, Employed: No


In [7]:
new_model = "llama-3-8b-CC"
torch_dtype = torch.float16
attn_implementation = "eager"

# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)


model = LlamaForCausalLM.from_pretrained("meta-llama/Llama-3.1-8B", 
                                         quantization_config=bnb_config,
                                         device_map="auto",
                                         attn_implementation=attn_implementation)

model.resize_token_embeddings(len(tokenizer))


Loading checkpoint shards: 100%|██████████| 4/4 [00:26<00:00,  6.61s/it]


Embedding(128256, 4096)

In [8]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=['up_proj', 'down_proj', 'gate_proj', 'k_proj', 'q_proj', 'v_proj', 'o_proj']
)
model = get_peft_model(model, peft_config)


run = wandb.init(
    project='Fine-tune Llama 3 8B on CC Dataset', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Currently logged in as: [33mdnicho26[0m ([33mdnicho26-university-of-north-carolina-at-charlotte[0m). Use [1m`wandb login --relogin`[0m to force relogin


In [9]:
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    evaluation_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

trainer = SFTTrainer(
    model=model,
    train_dataset=dataset["train"],
    eval_dataset=dataset["test"],
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=training_arguments,
    packing= False,
)



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


In [10]:
trainer.train()

  0%|          | 0/241 [00:00<?, ?it/s]

OutOfMemoryError: CUDA out of memory. Tried to allocate 32.00 GiB. GPU 0 has a total capacity of 8.00 GiB of which 380.00 MiB is free. Of the allocated memory 6.47 GiB is allocated by PyTorch, and 89.13 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
wandb.finish()
model.config.use_cache = True

In [None]:
# Test prompt to evaluate model
test_message = {
    "role": "user",
    "content": "Age: 35, CreditScore: 650, Income: 5000, YearsEmployed: 5, Gender: Male, Married: Yes, " \
               "Industry: Tech, Ethnicity: Asian, PriorDefault: No, Employed: Yes"
}


# Create the prompt from the message
prompt = tokenizer.apply_chat_template([test_message], tokenize=False, add_generation_prompt=True)

# Tokenize and generate response
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")
outputs = model.generate(**inputs, max_length=150, num_return_sequences=1)

# Decode and print the generated text
text = tokenizer.decode(outputs[0], skip_special_tokens=True)
print(text.split("assistant")[1])  # Extract the assistant's response