In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format

In [2]:
# huggingface-cli login --token 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    
# wandb login --relogin c1858410fe092c30e9807b3d2570dd61ae73d01f

In [3]:
run = wandb.init(
    project='Fine-tune Llama 3 on CC Dataset', 
    job_type="training", 
    anonymous="allow"
)

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


[34m[1mwandb[0m: Currently logged in as: [33mdnicho26[0m ([33mdnicho26-university-of-north-carolina-at-charlotte[0m). Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Tracking run with wandb version 0.18.5


[34m[1mwandb[0m: Run data is saved locally in [35m[1m/opt/notebooks/Chatbot-Credit-Card/wandb/run-20241024_215327-mo044jg6[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.


[34m[1mwandb[0m: Syncing run [33mquiet-field-26[0m


[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/dnicho26-university-of-north-carolina-at-charlotte/Fine-tune%20Llama%203%20on%20CC%20Dataset[0m


[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/dnicho26-university-of-north-carolina-at-charlotte/Fine-tune%20Llama%203%20on%20CC%20Dataset/runs/mo044jg6[0m


In [4]:
base_model = "meta-llama/Llama-3.2-3B-Instruct"
new_model = "./models/llama-3.2-3b-CC"

In [5]:
torch_dtype = torch.float16
attn_implementation = "eager"

In [6]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)
# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
import pandas as pd
df = pd.read_csv('cleaned-augmented.csv')
df

Unnamed: 0,Gender,Age,Debt,Married,BankCustomer,Industry,Ethnicity,YearsEmployed,PriorDefault,Employed,CreditScore,DriversLicense,Citizen,ZipCode,Income,Approved,Reason
0,1,30.83,0.000,1,1,Industrials,White,1.25,1,1,1,0,ByBirth,202,0,1,"This application was approved due to Income, Y..."
1,0,58.67,4.460,1,1,Materials,Black,3.04,1,1,6,0,ByBirth,43,560,1,"This application was approved due to Income, Y..."
2,0,24.50,0.500,1,1,Materials,Black,1.50,1,0,0,0,ByBirth,280,824,1,"This application was approved due to Income, Y..."
3,1,27.83,1.540,1,1,Industrials,White,3.75,1,1,5,1,ByBirth,100,3,1,This application was approved due to YearsEmpl...
4,1,20.17,5.625,1,1,Industrials,White,1.71,1,0,0,0,ByOtherMeans,120,0,1,"This application was approved due to Income, Y..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
685,1,21.08,10.085,0,0,Education,Black,1.25,0,0,0,0,ByBirth,260,0,0,"This application was denied due to Employed, Z..."
686,0,22.67,0.750,1,1,Energy,White,2.00,0,1,2,1,ByBirth,200,394,0,"This application was denied due to Income, Zip..."
687,0,25.25,13.500,0,0,Healthcare,Latino,2.00,0,1,1,1,ByBirth,200,1,0,"This application was denied due to Income, Zip..."
688,1,17.92,0.205,1,1,ConsumerStaples,White,0.04,0,0,0,0,ByBirth,280,750,0,This application was denied due to YearsEmploy...


In [8]:
import pandas as pd

# Function to preprocess the data with special tokens for fine-tuning LLaMA3
def preprocess_data_generalized(row):
    # Generate text for each input feature with special tokens
    gender_text = f"<gender> {'Male' if row['Gender'] == 1 else 'Female'} </gender>"
    age_text = f"<age> {row['Age']} </age>"
    debt_text = f"<debt> {row['Debt']} </debt>"
    married_text = f"<married> {'Yes' if row['Married'] == 1 else 'No'} </married>"
    bank_customer_text = f"<bank_customer> {'Yes' if row['BankCustomer'] == 1 else 'No'} </bank_customer>"
    industry_text = f"<industry> {row['Industry']} </industry>"
    ethnicity_text = f"<ethnicity> {row['Ethnicity']} </ethnicity>"
    years_employed_text = f"<years_employed> {row['YearsEmployed']} </years_employed>"
    prior_default_text = f"<prior_default> {'Yes' if row['PriorDefault'] == 1 else 'No'} </prior_default>"
    employed_text = f"<employed> {'Yes' if row['Employed'] == 1 else 'No'} </employed>"
    credit_score_text = f"<credit_score> {row['CreditScore']} </credit_score>"
    drivers_license_text = f"<drivers_license> {'Yes' if row['DriversLicense'] == 1 else 'No'} </drivers_license>"
    citizen_text = f"<citizen> {row['Citizen']} </citizen>"
    zip_code_text = f"<zip_code> {row['ZipCode']} </zip_code>"
    income_text = f"<income> {row['Income']} </income>"

    # Combine all input text with special tokens
    input_text = " ".join([
        gender_text, age_text, debt_text, married_text, bank_customer_text, 
        industry_text, ethnicity_text, years_employed_text, prior_default_text, 
        employed_text, credit_score_text, drivers_license_text, citizen_text, 
        zip_code_text, income_text
    ])
    
    # Output format for LLaMA fine-tuning (using special tokens for labels)
    output_text = f"<approved> {'Yes' if row['Approved'] == 1 else 'No'} </approved> <reason> {row['Reason']} </reason>"
    
    return {"text": input_text, "label": output_text}

# Apply the generalized preprocessing to the dataframe
df_processed = df.apply(preprocess_data_generalized, axis=1)
df_final = pd.DataFrame(df_processed.tolist())

# Display the first few rows of the processed data
print(df_final.head())


                                                text  \
0  <gender> Male </gender> <age> 30.83 </age> <de...   
1  <gender> Female </gender> <age> 58.67 </age> <...   
2  <gender> Female </gender> <age> 24.5 </age> <d...   
3  <gender> Male </gender> <age> 27.83 </age> <de...   
4  <gender> Male </gender> <age> 20.17 </age> <de...   

                                               label  
0  <approved> Yes </approved> <reason> This appli...  
1  <approved> Yes </approved> <reason> This appli...  
2  <approved> Yes </approved> <reason> This appli...  
3  <approved> Yes </approved> <reason> This appli...  
4  <approved> Yes </approved> <reason> This appli...  


In [9]:
from datasets import Dataset
dataset = Dataset.from_pandas(df_final)
dataset['text'][3]

'<gender> Male </gender> <age> 27.83 </age> <debt> 1.54 </debt> <married> Yes </married> <bank_customer> Yes </bank_customer> <industry> Industrials </industry> <ethnicity> White </ethnicity> <years_employed> 3.75 </years_employed> <prior_default> Yes </prior_default> <employed> Yes </employed> <credit_score> 5 </credit_score> <drivers_license> Yes </drivers_license> <citizen> ByBirth </citizen> <zip_code> 100 </zip_code> <income> 3 </income>'

In [10]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)
print(modules)

['q_proj', 'up_proj', 'k_proj', 'gate_proj', 'v_proj', 'down_proj', 'o_proj']


In [11]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

In [12]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=10,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

In [13]:
from datasets import Dataset

# Assuming dataset is a Dataset object with columns 'text' and 'label'
train_test_split = dataset.train_test_split(test_size=0.2)  # Split into 80% train, 20% test
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",  # The column containing the input text
    tokenizer=tokenizer,
    args=training_arguments,
    packing=False,
)



Deprecated positional argument(s) used in SFTTrainer, please use the SFTConfig to set these arguments instead.


Map:   0%|          | 0/552 [00:00<?, ? examples/s]

Map:   0%|          | 0/138 [00:00<?, ? examples/s]

In [14]:
trainer.train()



Step,Training Loss,Validation Loss
552,0.2085,0.245062
1104,0.2017,0.236781
1656,0.1703,0.250319
2208,0.1169,0.306822
2760,0.0604,0.376684














TrainOutput(global_step=2760, training_loss=0.18341788254759234, metrics={'train_runtime': 5273.6796, 'train_samples_per_second': 1.047, 'train_steps_per_second': 0.523, 'total_flos': 1.319618250504192e+16, 'train_loss': 0.18341788254759234, 'epoch': 10.0})

In [15]:
wandb.finish()

[34m[1mwandb[0m:                                                                                


[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run history:
[34m[1mwandb[0m:               eval/loss ▁▁▂▅█
[34m[1mwandb[0m:            eval/runtime █▃▄▂▁
[34m[1mwandb[0m: eval/samples_per_second ▁▆▅▇█
[34m[1mwandb[0m:   eval/steps_per_second ▁▆▅▇█
[34m[1mwandb[0m:             train/epoch ▁▁▁▂▂▂▂▂▂▂▃▃▃▄▄▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▆▇▇▇▇▇███
[34m[1mwandb[0m:       train/global_step ▁▁▁▂▂▂▂▂▃▃▃▃▃▃▃▃▃▄▄▄▄▄▄▅▅▅▅▆▆▆▆▆▇▇▇█████
[34m[1mwandb[0m:         train/grad_norm ▆▃▄▂▃▂▁▁▃▂▂▃▂▁▁▂▂▂▃▂▄█▇▅▄▆▅▄▄▅▂▄▂▃▃▃█▃▅▄
[34m[1mwandb[0m:     train/learning_rate ████▇▇▇▇▇▇▆▆▆▆▆▅▅▅▄▄▄▄▄▄▃▃▃▃▃▃▂▂▂▂▂▂▁▁▁▁
[34m[1mwandb[0m:              train/loss █▇▄▇▅▅▅▆▅▆▄▇██▅▆▆▄▆▄▅▄▄▃▄▃▃▂▂▂▂▂▂▂▂▁▁▁▁▁
[34m[1mwandb[0m: 
[34m[1mwandb[0m: Run summary:
[34m[1mwandb[0m:                eval/loss 0.37668
[34m[1mwandb[0m:             eval/runtime 61.7195
[34m[1mwandb[0m:  eval/samples_per_second 2.236
[34m[1mwandb[0m:    eval/steps_per_second 2.236
[34m[1mwandb[0m:               total_flos 1.319618250504

[34m[1mwandb[0m: 🚀 View run [33mquiet-field-26[0m at: [34m[4mhttps://wandb.ai/dnicho26-university-of-north-carolina-at-charlotte/Fine-tune%20Llama%203%20on%20CC%20Dataset/runs/mo044jg6[0m
[34m[1mwandb[0m: ⭐️ View project at: [34m[4mhttps://wandb.ai/dnicho26-university-of-north-carolina-at-charlotte/Fine-tune%20Llama%203%20on%20CC%20Dataset[0m
[34m[1mwandb[0m: Synced 5 W&B file(s), 0 media file(s), 0 artifact file(s) and 0 other file(s)


[34m[1mwandb[0m: Find logs at: [35m[1m./wandb/run-20241024_215327-mo044jg6/logs[0m


In [16]:
# Instruction tailored to credit card approval context
instruction = """You are a highly knowledgeable financial advisor specializing in credit card approvals. 
    Be informative, polite, and provide clear responses to any queries regarding credit approval decisions.
    """

# Example message (user asking about credit card approval)
messages = [
    {"role": "system", "content": instruction},
    {"role": "user", "content": "Can I know why my credit card application was rejected? My age is 30, income is $40,000, and credit score is 580."}
]

# Generate the prompt using the chat template (assuming tokenizer.apply_chat_template is a custom method for your setup)
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# Tokenize the prompt
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

# Generate model outputs (adjusting parameters if necessary)
outputs = model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)

# Decode the model's response
text = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the assistant's response (assuming the response begins after the 'assistant' token)
print(text.split("assistant")[1])

# Save the fine-tuned model and tokenizer for future use
model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)


Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)



Dear [Your Name],

Thank you for your interest in our credit card program. I'm happy to provide you with information regarding your application.

Your credit score is 580, which is a good score. However, our credit card program requires a minimum credit score of 660. This is because our program is designed for customers who have a history of making on-time payments.

Based on your income and credit score, we were unable to approve your application. This is because your income is below our minimum income requirement of $6600. As a result, we were unable to determine whether you have sufficient credit history to support the credit card.

I would like to suggest that you consider applying for our other financial products, such as our healthcare plan or our




('./models/llama-3.2-3b-CC/tokenizer_config.json',
 './models/llama-3.2-3b-CC/special_tokens_map.json',
 './models/llama-3.2-3b-CC/tokenizer.json')

In [17]:
# Example custom prompt provided by the user
custom_prompt = "Age: 27.83, CreditScore: 5, Income: 3, YearsEmployed: 3.75, Gender: Male, Married: Yes, Industry: Industrials, Ethnicity: White, PriorDefault: Yes, Employed: Yes"

# You don't need a system message if you are simply testing this input directly
messages = [
    {"role": "system", "content": instruction},
    {"role": "user", "content": custom_prompt}
]

# Generate the prompt using the chat template (if using custom chat template generation)
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# Tokenize the custom prompt
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

# Generate output from the model
outputs = model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)

# Decode the model's response
response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the assistant's response
print(response_text.split("assistant")[1])



Age: 20.0, CreditScore: 11, Income: 0, YearsEmployed: 0.0, Gender: Male, Married: No, Industry: Energy, Ethnicity: White, PriorDefault: Yes, Employed: Yes, Debt: 0, CreditScore: 0, Industry: Energy, Ethnicity: White, PriorDefault: No, Employed: No, YearsEmployed: 0.0, Gender: Male, Married: No, Industry: Industrials, Ethnicity: White, PriorDefault: No, Employed: No, YearsEmployed: 0.0, Gender: Male, Married: No, Industry: Industrials, Ethnicity: White,
