<span style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">An Exception was encountered at '<a href="#papermill-error-cell">In [18]</a>'.</span>

In [1]:
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    HfArgumentParser,
    TrainingArguments,
    pipeline,
    logging,
    TrainerCallback,
)
from peft import (
    LoraConfig,
    PeftModel,
    prepare_model_for_kbit_training,
    get_peft_model,
)
import os, torch, wandb
from datasets import load_dataset
from trl import SFTTrainer, setup_chat_format
import pandas as pd

In [2]:
# huggingface-cli login --token key   
# wandb login --relogin key

In [3]:
import pandas as pd
df = pd.read_csv('/opt/notebooks/Chatbot-Credit-Card/backend/dataset/target-augmented.csv')
df.head()

Unnamed: 0,ID,CODE_GENDER,FLAG_OWN_CAR,FLAG_OWN_REALTY,CNT_CHILDREN,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,NAME_EDUCATION_TYPE,NAME_FAMILY_STATUS,NAME_HOUSING_TYPE,...,FLAG_WORK_PHONE,FLAG_PHONE,FLAG_EMAIL,OCCUPATION_TYPE,CNT_FAM_MEMBERS,begin_month,STATUS,Prediction,Actual,Explanation
0,5008804,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,1.0,0.0,0.0,,2.0,-15.0,X,Approved,Approved,This application was approved due to Client Nu...
1,5008805,M,Y,Y,0,427500.0,Working,Higher education,Civil marriage,Rented apartment,...,1.0,0.0,0.0,,2.0,-14.0,X,Approved,Approved,This application was approved due to Client Nu...
2,5008806,M,Y,Y,0,112500.0,Working,Secondary / secondary special,Married,House / apartment,...,0.0,0.0,0.0,Security staff,2.0,-29.0,X,Approved,Approved,This application was approved due to Client Nu...
3,5008808,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,...,0.0,1.0,1.0,Sales staff,1.0,-4.0,X,Approved,Approved,This application was approved due to Client Nu...
4,5008809,F,N,Y,0,270000.0,Commercial associate,Secondary / secondary special,Single / not married,House / apartment,...,0.0,1.0,1.0,Sales staff,1.0,-26.0,X,Approved,Approved,This application was approved due to Client Nu...


In [4]:
print(df['Explanation'].iloc[0])

This application was approved due to Client Number, -0.15, Family Size, Age (Days), Annual Income, -0.77, Phone, -1.62, Employment Duration (Days).


In [5]:
import re

# Function to remove numbers and clean the Explanation column
def clean_explanation(text):
    return re.sub(r'[-+]?\d*\.\d+|\d+', '', text).replace(', ,', ',').replace(' ,', ',').strip(", ")

# Apply the cleaning function to the Explanation column
df["Explanation"] = df["Explanation"].apply(clean_explanation)

# Display the DataFrame with the cleaned Explanation column
print(df['Explanation'].iloc[0])

This application was approved due to Client Number, Family Size, Age (Days), Annual Income, Phone, Employment Duration (Days).


In [6]:
from datetime import datetime
import pandas as pd

# Define the reference date (first view of the dataset)
reference_date = datetime(2023, 11, 12)

# Ensure DAYS_BIRTH has no missing values
df['DAYS_BIRTH'].fillna(0, inplace=True)  # Assuming missing values indicate invalid or unknown data
df['Age (Years)'] = ((-df['DAYS_BIRTH'] // 365).astype(int))
df['Age (Years)'] = df['Age (Years)'].astype(str) + " years old"

# Ensure DAYS_EMPLOYED has no missing values
df['DAYS_EMPLOYED'].fillna(0, inplace=True)  # Assuming missing values indicate unemployment
df['Employment Duration'] = df['DAYS_EMPLOYED'].apply(
    lambda x: f"{abs(x) // 365} years" if x < 0 else "Unemployed"
)

# Ensure MONTHS_BALANCE has no missing values
df['begin_month'].fillna(0, inplace=True)  # Assuming 0 means current month
df['Month Balance'] = df['begin_month'].apply(
    lambda x: "Current month" if x == 0 else f"{abs(x)} months ago"
)

# Ensure STATUS has no missing values and map to descriptive strings
status_mapping = {
    "0": "1-29 days past due",
    "1": "30-59 days past due",
    "2": "60-89 days overdue",
    "3": "90-119 days overdue",
    "4": "120-149 days overdue",
    "5": "Overdue or bad debts (write-offs for more than 150 days)",
    "C": "Paid off that month",
    "X": "No loan for the month"
}
df['STATUS'].fillna("X", inplace=True)  # Assuming "X" for missing status
df['Loan Status'] = df['STATUS'].map(status_mapping)

# Optional: Drop the raw columns if not needed
df.drop(['DAYS_BIRTH', 'DAYS_EMPLOYED', 'begin_month', 'STATUS'], axis=1, inplace=True)


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['DAYS_BIRTH'].fillna(0, inplace=True)  # Assuming missing values indicate invalid or unknown data
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['DAYS_EMPLOYED'].fillna(0, inplace=True)  # Assuming missing values indicate unemployment
The behavior will change in pandas 3.0

In [7]:
def preprocess_data_generalized(row):
    try:
        # Extract the 'Explanation' text and convert it to lowercase for consistent matching
        reason_text = row.get('Explanation', '').lower()
        print(f"\nProcessing row: {row.name}")
        print(f"Explanation: {reason_text}")

        # Initialize a list for holding text snippets
        feature_texts = []

        # Debugging: Check each feature condition
        if 'gender' in reason_text and 'Gender' in row and pd.notna(row['Gender']):
            feature_texts.append(f"Gender is {'Male' if row['Gender'] == 1 else 'Female'}")
            print(f"Added feature: Gender - {'Male' if row['Gender'] == 1 else 'Female'}")
        if 'age' in reason_text and 'Age (Years)' in row and pd.notna(row['Age (Years)']):
            feature_texts.append(f"Age (Years) is {row['Age (Years)']}")
            print(f"Added feature: Age (Years) - {row['Age (Years)']}")
        if 'debt' in reason_text and 'Debt' in row and pd.notna(row['Debt']) and row['Debt'] != 0:
            feature_texts.append(f"Debt is {row['Debt']} units")
            print(f"Added feature: Debt - {row['Debt']}")
        if 'marital' in reason_text and 'Marital Status' in row and pd.notna(row['Marital Status']):
            feature_texts.append(f"Marital status is {'Married' if row['Marital Status'] == 1 else 'Single'}")
            print(f"Added feature: Marital Status - {'Married' if row['Marital Status'] == 1 else 'Single'}")
        if 'bank' in reason_text and 'BankCustomer' in row and pd.notna(row['BankCustomer']):
            feature_texts.append(f"Bank customer is {'Yes' if row['BankCustomer'] == 1 else 'No'}")
            print(f"Added feature: BankCustomer - {'Yes' if row['BankCustomer'] == 1 else 'No'}")
        if 'occupation' in reason_text and 'Occupation' in row and pd.notna(row['Occupation']):
            feature_texts.append(f"Occupation is {row['Occupation']}")
            print(f"Added feature: Occupation - {row['Occupation']}")
        if 'ethnicity' in reason_text and 'Ethnicity' in row and pd.notna(row['Ethnicity']):
            feature_texts.append(f"Ethnicity is {row['Ethnicity']}")
            print(f"Added feature: Ethnicity - {row['Ethnicity']}")
        if 'employment' in reason_text and 'Employment Duration' in row and pd.notna(row['Employment Duration']):
            feature_texts.append(f"Employment Duration is {row['Employment Duration']}")
            print(f"Added feature: Employment Duration - {row['Employment Duration']}")
        if 'prior default' in reason_text and 'PriorDefault' in row and pd.notna(row['PriorDefault']):
            feature_texts.append(f"Prior default is {'Yes' if row['PriorDefault'] == 1 else 'No'}")
            print(f"Added feature: Prior Default - {'Yes' if row['PriorDefault'] == 1 else 'No'}")
        if 'employed' in reason_text and 'Employed' in row and pd.notna(row['Employed']):
            feature_texts.append(f"Currently employed is {'Yes' if row['Employed'] == 1 else 'No'}")
            print(f"Added feature: Employed - {'Yes' if row['Employed'] == 1 else 'No'}")
        if 'credit score' in reason_text and 'CreditScore' in row and pd.notna(row['CreditScore']):
            feature_texts.append(f"Credit score is {row['CreditScore']}")
            print(f"Added feature: Credit Score - {row['CreditScore']}")
        if 'driver' in reason_text and 'Work Phone' in row and pd.notna(row['Work Phone']):
            feature_texts.append(f"Has driver's license is {'Yes' if row['Work Phone'] == 1 else 'No'}")
            print(f"Added feature: Work Phone (Driver's License) - {'Yes' if row['Work Phone'] == 1 else 'No'}")
        if 'citizen' in reason_text and 'Citizen' in row and pd.notna(row['Citizen']):
            feature_texts.append(f"Citizen is {row['Citizen']}")
            print(f"Added feature: Citizen - {row['Citizen']}")
        if 'zip' in reason_text and 'ZipCode' in row and pd.notna(row['ZipCode']):
            feature_texts.append(f"Zip code is {row['ZipCode']}")
            print(f"Added feature: ZipCode - {row['ZipCode']}")
        if 'income' in reason_text and 'Annual Income' in row and pd.notna(row['Annual Income']) and row['Annual Income'] != 0:
            feature_texts.append(f"Annual income is {row['Annual Income']} units")
            print(f"Added feature: Annual Income - {row['Annual Income']}")

        # Combine all valid feature texts into the input
        input_text = ". ".join(feature_texts) + "."
        if not feature_texts:
            input_text = "No relevant features extracted."
            print("No relevant features extracted for this row.")

        # Generate label text
        label_text = f"Application approved: {'Yes' if row.get('Approved', 0) == 1 else 'No'}."
        print(f"Generated text: {input_text}")
        print(f"Generated label: {label_text}")

        return {"text": input_text, "label": label_text}

    except Exception as e:
        print(f"Error processing row: {row.to_dict()} - {e}")
        return None

In [8]:

# Assume df is your original DataFrame
# Apply the generalized preprocessing to the dataframe
df_processed = df.apply(preprocess_data_generalized, axis=1)

# Drop rows where preprocessing failed (None values)
df_processed = df_processed.dropna()

# Convert the processed Series to a DataFrame
df_final = pd.DataFrame(df_processed.tolist())

# Display the first few rows of the processed data
df_final.head()



Processing row: 0
Explanation: this application was approved due to client number, family size, age (days), annual income, phone, employment duration (days).
Added feature: Age (Years) - 32 years old
Added feature: Employment Duration - 12.0 years
Generated text: Age (Years) is 32 years old. Employment Duration is 12.0 years.
Generated label: Application approved: No.

Processing row: 1
Explanation: this application was approved due to client number, age (days), work phone, education level,, number of children.
Added feature: Age (Years) - 32 years old
Generated text: Age (Years) is 32 years old.
Generated label: Application approved: No.

Processing row: 2
Explanation: this application was approved due to client number, phone, number of children, email, marital status, work phone, annual income, housing type.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 3
Explanation: this appl

Generated text: Employment Duration is 12.0 years.
Generated label: Application approved: No.

Processing row: 37
Explanation: this application was approved due to client number, housing type, employment duration (days), occupation,, email, income category, age (days), number of children, annual income.
Added feature: Age (Years) - 56 years old
Added feature: Employment Duration - 12.0 years
Generated text: Age (Years) is 56 years old. Employment Duration is 12.0 years.
Generated label: Application approved: No.

Processing row: 38
Explanation: this application was approved due to client number,, number of children, employment duration (days), occupation, family size, .
Added feature: Employment Duration - 12.0 years
Generated text: Employment Duration is 12.0 years.
Generated label: Application approved: No.

Processing row: 39
Explanation: this application was approved due to client number,, employment duration (days), marital status, income category, number of children, age (days), 

Added feature: Age (Years) - 31 years old
Generated text: Age (Years) is 31 years old.
Generated label: Application approved: No.

Processing row: 1569
Explanation: this application was approved due to client number, work phone, email, marital status, age (days), phone, annual income.
Added feature: Age (Years) - 31 years old
Generated text: Age (Years) is 31 years old.
Generated label: Application approved: No.

Processing row: 1570
Explanation: this application was approved due to client number, email, education level, car ownership,, annual income, work phone, occupation.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 1571
Explanation: this application was denied due to client number, education level, phone, housing type, property ownership, annual income, occupation.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Ap

Generated text: Age (Years) is 58 years old.
Generated label: Application approved: No.

Processing row: 2283
Explanation: this application was denied due to housing type, employment duration (days), work phone, marital status, education level, age (days), car ownership, email.
Added feature: Age (Years) - 57 years old
Added feature: Employment Duration - Unemployed
Generated text: Age (Years) is 57 years old. Employment Duration is Unemployed.
Generated label: Application approved: No.

Processing row: 2284
Explanation: this application was denied due to housing type, age (days), email, family size, income category, education level, employment duration (days).
Added feature: Age (Years) - 57 years old
Added feature: Employment Duration - Unemployed
Generated text: Age (Years) is 57 years old. Employment Duration is Unemployed.
Generated label: Application approved: No.

Processing row: 2285
Explanation: this application was denied due to age (days), marital status, family size, number


Processing row: 2492
Explanation: this application was denied due to, marital status, employment duration (days).
Added feature: Employment Duration - 10.0 years
Generated text: Employment Duration is 10.0 years.
Generated label: Application approved: No.

Processing row: 2493
Explanation: this application was denied due to, employment duration (days), housing type, car ownership, family size, email, phone.
Added feature: Employment Duration - 10.0 years
Generated text: Employment Duration is 10.0 years.
Generated label: Application approved: No.

Processing row: 2494
Explanation: this application was denied due to, work phone, employment duration (days), income category, number of children, .
Added feature: Employment Duration - 10.0 years
Generated text: Employment Duration is 10.0 years.
Generated label: Application approved: No.

Processing row: 2495
Explanation: this application was denied due to client number, car ownership, occupation, email, family size, marital status.
No rel


Processing row: 2817
Explanation: this application was denied due to, family size, marital status, car ownership, phone, number of children.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 2818
Explanation: this application was denied due to, housing type, car ownership, annual income, work phone,, family size, number of children.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 2819
Explanation: this application was denied due to,, family size, email, car ownership.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 2820
Explanation: this application was denied due to, family size, annual income, employment duration (days), car ownership, housing type, .
Added feature: Employmen


Processing row: 3252
Explanation: this application was denied due to, education level, marital status.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 3253
Explanation: this application was denied due to, housing type, age (days), employment duration (days), income category, phone, work phone, email, .
Added feature: Age (Years) - 59 years old
Added feature: Employment Duration - Unemployed
Generated text: Age (Years) is 59 years old. Employment Duration is Unemployed.
Generated label: Application approved: No.

Processing row: 3254
Explanation: this application was denied due to, work phone, family size, education level, housing type, income category, employment duration (days).
Added feature: Employment Duration - Unemployed
Generated text: Employment Duration is Unemployed.
Generated label: Application approved: No.

Processing row: 3255
Explanation: this application was denied 


Processing row: 3637
Explanation: this application was denied due to, employment duration (days), age (days), work phone, education level, property ownership, email, marital status.
Added feature: Age (Years) - 57 years old
Added feature: Employment Duration - Unemployed
Generated text: Age (Years) is 57 years old. Employment Duration is Unemployed.
Generated label: Application approved: No.

Processing row: 3638
Explanation: this application was denied due to, housing type, age (days), annual income, education level, marital status, .
Added feature: Age (Years) - 57 years old
Generated text: Age (Years) is 57 years old.
Generated label: Application approved: No.

Processing row: 3639
Explanation: this application was denied due to, family size, employment duration (days), number of children, age (days), income category, email, car ownership, housing type, work phone, .
Added feature: Age (Years) - 58 years old
Added feature: Employment Duration - Unemployed
Generated text: Age (Years

No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 4165
Explanation: this application was denied due to, work phone, car ownership, age (days), education level, property ownership, income category.
Added feature: Age (Years) - 28 years old
Generated text: Age (Years) is 28 years old.
Generated label: Application approved: No.

Processing row: 4166
Explanation: this application was denied due to, car ownership, marital status, phone, housing type.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 4167
Explanation: this application was denied due to, housing type, car ownership, income category, family size, work phone, phone, .
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 4168
Ex


Processing row: 4703
Explanation: this application was denied due to,, occupation, age (days), marital status, number of children.
Added feature: Age (Years) - 23 years old
Generated text: Age (Years) is 23 years old.
Generated label: Application approved: No.

Processing row: 4704
Explanation: this application was denied due to, housing type, occupation, family size, property ownership, email.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 4705
Explanation: this application was denied due to, family size, number of children, property ownership, marital status, .
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 4706
Explanation: this application was denied due to, housing type, occupation, marital status, work phone, family size, property ownership.
No relevant features extracte


Processing row: 5421
Explanation: this application was denied due to client number, annual income, housing type, car ownership, number of children, work phone, education level.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 5422
Explanation: this application was denied due to client number, email, employment duration (days), age (days), number of children, marital status, .
Added feature: Age (Years) - 29 years old
Added feature: Employment Duration - 1.0 years
Generated text: Age (Years) is 29 years old. Employment Duration is 1.0 years.
Generated label: Application approved: No.

Processing row: 5423
Explanation: this application was denied due to client number, employment duration (days), housing type, car ownership, age (days), education level, work phone, number of children, marital status, .
Added feature: Age (Years) - 29 years old
Added feature: Employment Duration - 1.0 y

Added feature: Employment Duration - Unemployed
Generated text: Employment Duration is Unemployed.
Generated label: Application approved: No.

Processing row: 5992
Explanation: this application was denied due to client number, phone, number of children, housing type, employment duration (days), work phone, email, family size.
Added feature: Employment Duration - Unemployed
Generated text: Employment Duration is Unemployed.
Generated label: Application approved: No.

Processing row: 5993
Explanation: this application was denied due to client number, housing type,, phone, number of children, income category, property ownership.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 5994
Explanation: this application was denied due to client number, housing type, employment duration (days), property ownership, car ownership, work phone, income category.
Added feature: Employment Duration - Un



Processing row: 6734
Explanation: this application was denied due to,, age (days), email, car ownership.
Added feature: Age (Years) - 64 years old
Generated text: Age (Years) is 64 years old.
Generated label: Application approved: No.

Processing row: 6735
Explanation: this application was denied due to, housing type, email, number of children, family size, income category, marital status.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 6736
Explanation: this application was denied due to, housing type, email, income category, age (days), family size, marital status, number of children, car ownership, .
Added feature: Age (Years) - 64 years old
Generated text: Age (Years) is 64 years old.
Generated label: Application approved: No.

Processing row: 6737
Explanation: this application was denied due to, work phone, car ownership, age (days), email, family size, income category, numbe


No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 7321
Explanation: this application was denied due to, housing type, phone, work phone, number of children,, marital status,, annual income, family size.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 7322
Explanation: this application was denied due to, housing type, work phone,, annual income, marital status, email.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 7323
Explanation: this application was denied due to, family size, annual income, marital status, number of children, email, work phone, .
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approve


Processing row: 8221
Explanation: this application was denied due to client number,, email, family size, housing type, marital status, income category, car ownership.
No relevant features extracted for this row.
Generated text: No relevant features extracted.
Generated label: Application approved: No.

Processing row: 8222
Explanation: this application was denied due to, family size, email, age (days), property ownership, work phone, .
Added feature: Age (Years) - 56 years old
Generated text: Age (Years) is 56 years old.
Generated label: Application approved: No.

Processing row: 8223
Explanation: this application was denied due to, housing type, family size, employment duration (days), marital status, email, number of children, car ownership.
Added feature: Employment Duration - Unemployed
Generated text: Employment Duration is Unemployed.
Generated label: Application approved: No.

Processing row: 8224
Explanation: this application was denied due to, car ownership, email, property o

Unnamed: 0,text,label
0,Age (Years) is 32 years old. Employment Durati...,Application approved: No.
1,Age (Years) is 32 years old.,Application approved: No.
2,No relevant features extracted.,Application approved: No.
3,No relevant features extracted.,Application approved: No.
4,No relevant features extracted.,Application approved: No.


In [9]:
# Convert the first row to a dictionary for better readability
first_row_dict = df_final.iloc[0].to_dict()
print(first_row_dict)

{'text': 'Age (Years) is 32 years old. Employment Duration is 12.0 years.', 'label': 'Application approved: No.'}


In [10]:
base_model = "meta-llama/Llama-3.2-3B-Instruct"
new_model = "/opt/notebooks/Chatbot-Credit-Card/models/llama-3.2-3b-CC"

In [11]:
torch_dtype = torch.float16
attn_implementation = "eager"

In [12]:
# QLoRA config
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch_dtype,
    bnb_4bit_use_double_quant=True,
)
# Load model
model = AutoModelForCausalLM.from_pretrained(
    base_model,
    quantization_config=bnb_config,
    device_map="auto",
    attn_implementation=attn_implementation
)

# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [13]:
from datasets import Dataset
dataset = Dataset.from_pandas(df_final)
dataset['text'][3]

'No relevant features extracted.'

In [14]:
import bitsandbytes as bnb

def find_all_linear_names(model):
    cls = bnb.nn.Linear4bit
    lora_module_names = set()
    for name, module in model.named_modules():
        if isinstance(module, cls):
            names = name.split('.')
            lora_module_names.add(names[0] if len(names) == 1 else names[-1])
    if 'lm_head' in lora_module_names:  # needed for 16 bit
        lora_module_names.remove('lm_head')
    return list(lora_module_names)

modules = find_all_linear_names(model)
print(modules)

['v_proj', 'down_proj', 'q_proj', 'gate_proj', 'k_proj', 'up_proj', 'o_proj']


In [15]:
# LoRA config
peft_config = LoraConfig(
    r=16,
    lora_alpha=32,
    lora_dropout=0.05,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=modules
)
model, tokenizer = setup_chat_format(model, tokenizer)
model = get_peft_model(model, peft_config)

In [16]:
#Hyperparamter
training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

In [17]:
from datasets import Dataset

# Assuming dataset is a Dataset object with columns 'text' and 'label'
train_test_split = dataset.train_test_split(test_size=0.2)  # Split into 80% train, 20% test
train_dataset = train_test_split['train']
eval_dataset = train_test_split['test']

<span id="papermill-error-cell" style="color:red; font-family:Helvetica Neue, Helvetica, Arial, sans-serif; font-size:2em;">Execution using papermill encountered an exception here and stopped:</span>

In [18]:
# Custom Callback to save model every 5 epochs
class SaveEveryNEpochsCallback(TrainerCallback):
    def __init__(self, save_every_n_epochs, output_dir):
        self.save_every_n_epochs = save_every_n_epochs
        self.output_dir = output_dir

    def on_epoch_end(self, args, state: TrainerState, control: TrainerControl, **kwargs):
        if state.epoch % self.save_every_n_epochs == 0:
            model_save_path = os.path.join(self.output_dir, f"checkpoint-{int(state.epoch)}-epochs")
            kwargs["model"].save_pretrained(model_save_path)
            kwargs["tokenizer"].save_pretrained(model_save_path)
            print(f"Model saved at {model_save_path}")

NameError: name 'TrainerState' is not defined

In [None]:
run = wandb.init(
    project='Fine-tune Llama 3 on CC Dataset', 
    job_type="training", 
    anonymous="allow"
)

In [None]:
# Adjust the training arguments for the single epoch run
single_epoch_training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=1,  # Only run for one epoch first
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

# Train for one epoch and save as new_model + "-1-epoch"
trainer = SFTTrainer(
    model=model,
    train_dataset=train_dataset,
    eval_dataset=eval_dataset,
    peft_config=peft_config,
    max_seq_length=512,
    dataset_text_field="text",
    tokenizer=tokenizer,
    args=single_epoch_training_arguments,
    packing=False,
)
trainer.train()
model.save_pretrained(new_model + "-1-epoch")
tokenizer.save_pretrained(new_model + "-1-epoch")
print(f"Model saved after 1 epoch at {new_model + '-1-epoch'}")

In [None]:
from transformers import EarlyStoppingCallback
early_stopping_patience = 5  # Number of eval steps with no improvement before stopping

# Now, reconfigure for full training with 100 epochs and save every 5 epochs
full_training_arguments = TrainingArguments(
    output_dir=new_model,
    per_device_train_batch_size=1,
    per_device_eval_batch_size=1,
    gradient_accumulation_steps=2,
    optim="paged_adamw_32bit",
    num_train_epochs=100,  # Set to full training epochs
    eval_strategy="steps",
    eval_steps=0.2,
    logging_steps=1,
    warmup_steps=10,
    logging_strategy="steps",
    learning_rate=2e-4,
    fp16=False,
    bf16=False,
    group_by_length=True,
    report_to="wandb"
)

In [None]:
# Run the full training with periodic saving
trainer.args = full_training_arguments  # Update the training arguments
trainer.add_callback(SaveEveryNEpochsCallback(save_every_n_epochs=5, output_dir=new_model))
trainer.add_callback(EarlyStoppingCallback(early_stopping_patience=early_stopping_patience))

trainer.train()
wandb.finish()

In [None]:
# Instruction tailored to credit card approval context
instruction = """You are a highly knowledgeable financial advisor specializing in credit card approvals. 
    Be informative, polite, and provide clear responses to any queries regarding credit approval decisions.
    """

# Example message (user asking about credit card approval)
messages = [
    {"role": "system", "content": instruction},
    {"role": "user", "content": "Can I know why my credit card application was rejected? My age is 30, income is $40,000, and credit score is 580."}
]

# Generate the prompt using the chat template (assuming tokenizer.apply_chat_template is a custom method for your setup)
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# Tokenize the prompt
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

# Generate model outputs (adjusting parameters if necessary)
outputs = model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)

# Decode the model's response
text = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the assistant's response (assuming the response begins after the 'assistant' token)
print(text.split("assistant")[1])

# Save the fine-tuned model and tokenizer for future use
model.save_pretrained(new_model)
tokenizer.save_pretrained(new_model)


In [None]:
# Example custom prompt provided by the user
custom_prompt = "Age: 27.83, CreditScore: 5, Income: 3, YearsEmployed: 3.75, Gender: Male, Married: Yes, Industry: Industrials, Ethnicity: White, PriorDefault: Yes, Employed: Yes"

# You don't need a system message if you are simply testing this input directly
messages = [
    {"role": "system", "content": instruction},
    {"role": "user", "content": custom_prompt}
]

# Generate the prompt using the chat template (if using custom chat template generation)
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)

# Tokenize the custom prompt
inputs = tokenizer(prompt, return_tensors='pt', padding=True, truncation=True).to("cuda")

# Generate output from the model
outputs = model.generate(**inputs, max_new_tokens=150, num_return_sequences=1)

# Decode the model's response
response_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

# Print the assistant's response
print(response_text.split("assistant")[1])
