In [1]:
import os

# Set the CUDA_VISIBLE_DEVICES environment variable
# 40 GB
os.environ["CUDA_VISIBLE_DEVICES"] = "MIG-a4a538a2-a199-58d8-9e56-ed86db02edf8"

In [2]:
import sys
print(sys.executable)


/home/sahsan/PythonProjects/pytorch/Falcon7B-manual/manual_py_3_8_env/bin/python


In [3]:
import torch
torch.cuda.is_available()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [4]:
import torch
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments, GenerationConfig
from peft import LoraConfig, get_peft_model, PeftConfig, PeftModel, prepare_model_for_kbit_training
from trl import SFTTrainer
import warnings
warnings.filterwarnings("ignore")

In [5]:
from huggingface_hub import notebook_login
notebook_login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [6]:
# from datasets import load_dataset

# # Load the dataset from the formatted CSV file
# data = load_dataset('csv', data_files='formatted-data.csv')
# # dataset = load_dataset('csv', data_files='owasp-formatted.csv')

# # Accessing the dataset
# # print(repr(data[0]['text']))  # Print the first row of the dataset
# # print(repr(data["train"][0]['text']))
# print(repr(data["train"][5]['text']))

In [7]:
data = load_dataset("ZahrizhalAli/mental_health_conversational_dataset")
data

# Take a Glance on how the data looks like 
print(repr(data["train"][10]['text']))

'<HUMAN>: Are there cures for mental health problems?\n<ASSISTANT>: When it comes to mental health problems, it\'s important to clarify that the term "cure" might not be the most accurate way to describe the situation.\n\nMental health issues can be complex and varied, and there isn\'t always a straightforward "cure" like there might be for some physical ailments. However, many mental health conditions can be effectively managed, treated, and even overcome with the right support, interventions, and coping strategies.\n\nTreatment options often include therapy, counseling, medication, lifestyle changes, and self-help techniques. The goal is to improve a person\'s overall well-being and ability to cope with challenges rather than just eliminating the problem entirely.\n\nIt\'s essential to seek professional help if you or someone you know is struggling with mental health concerns. A mental health professional can provide personalized guidance and support tailored to individual needs. Rem

Load the Pre-trained Model

In [8]:
model_name = "tiiuae/falcon-7b"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,            # load model in 4-bit precision
    bnb_4bit_quant_type="nf4",    # pre-trained model should be quantized in 4-bit NF format
    bnb_4bit_use_double_quant=True, # Using double quantization as mentioned in QLoRA paper
    bnb_4bit_compute_dtype=torch.bfloat16, # During computation, pre-trained model should be loaded in BF16 format
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config, # Use bitsandbytes config
    device_map="auto",  # Specifying device_map="auto" so that HF Accelerate will determine which GPU to put each layer of the model on
    trust_remote_code=True, # Set trust_remote_code=True to use falcon-7b model with custom code
)
  





Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [9]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) # Set trust_remote_code=True
tokenizer.pad_token = tokenizer.eos_token # Setting pad_token same as eos_token

PEFT (Parameter Efficient Fine Tuning) and QLoRA

In [10]:
model = prepare_model_for_kbit_training(model)

lora_alpha = 32 # scaling factor for the weight matrices
lora_dropout = 0.05 # dropout probability of the LoRA layers
lora_rank = 32 # dimension of the low-rank matrices

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_rank,
    bias="none",  # setting to 'none' for only training weight params instead of biases
    task_type="CAUSAL_LM",
    target_modules=[         # Setting names of modules in falcon-7b model that we want to apply LoRA to
        "query_key_value",
        "dense",
        "dense_h_to_4h",
        "dense_4h_to_h",
    ]
)

peft_model = get_peft_model(model, peft_config)

In [11]:
output_dir = "./models/falcon-7b-smaller-cyber-data"
per_device_train_batch_size = 2 # reduce batch size by 2x if out-of-memory error
gradient_accumulation_steps = 2  # increase gradient accumulation steps by 2x if batch size is reduced
optim = "paged_adamw_32bit" # activates the paging for better memory management
save_strategy="steps" # checkpoint save strategy to adopt during training
save_steps = 10 # number of updates steps before two checkpoint saves
logging_steps = 10  # number of update steps between two logs if logging_strategy="steps"
learning_rate = 2e-4  # learning rate for AdamW optimizer
max_grad_norm = 0.3 # maximum gradient norm (for gradient clipping)
max_steps = 320        # training will happen for 320 steps
warmup_ratio = 0.03 # number of steps used for a linear warmup from 0 to learning_rate
lr_scheduler_type = "cosine"  # learning rate scheduler

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    bf16=False,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    push_to_hub=True,
    tf32=False,
    report_to="tensorboard",
)


In [12]:
trainer = SFTTrainer(
    model=peft_model,
    train_dataset=data['train'],
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=1024,
    tokenizer=tokenizer,
    args=training_arguments,
)

In [13]:
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.float32)

In [14]:
peft_model.config.use_cache = False
trainer.train()

You're using a PreTrainedTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


Step,Training Loss
10,1.7321
20,1.4406
30,1.5178
40,1.3815
50,1.1834
60,1.2221
70,1.1942
80,1.2186
90,1.0808
100,0.8355


TrainOutput(global_step=320, training_loss=0.6114265356212855, metrics={'train_runtime': 2308.0423, 'train_samples_per_second': 0.555, 'train_steps_per_second': 0.139, 'total_flos': 1.1442388164380928e+16, 'train_loss': 0.6114265356212855, 'epoch': 7.27})

In [15]:
trainer.push_to_hub()

'https://huggingface.co/shahrukh95/falcon-7b-smaller-cyber-data/tree/main/'

In [16]:
# from tensorboard import notebook
# log_dir = "cybersecurity-Llama-2-7b-chat-hf/runs"
# notebook.start("--logdir {} --port 4001".format(log_dir))
# %load_ext tensorboard
# %tensorboard --logdir {log_dir} --port 4000


In [17]:
# Loading original model
model_name = "tiiuae/falcon-7b"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [18]:
# Loading PEFT model
PEFT_MODEL = "shahrukh95/falcon-7b-smaller-cyber-data"

config = PeftConfig.from_pretrained(PEFT_MODEL)
peft_base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

peft_model = PeftModel.from_pretrained(peft_base_model, PEFT_MODEL)

peft_tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
peft_tokenizer.pad_token = peft_tokenizer.eos_token

adapter_config.json:   0%|          | 0.00/534 [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

adapter_model.bin:   0%|          | 0.00/261M [00:00<?, ?B/s]

Inferences

In [23]:
# Function to generate responses from both original model and PEFT model and compare their answers.
def generate_answer(query):
  system_prompt = """Answer the following question truthfully.
  If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
  If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'."""

  user_prompt = f""": {query}
  : """

  final_prompt = system_prompt + "\n" + user_prompt

  device = "cuda:0"
  dashline = "-".join("" for i in range(50))

  encoding = tokenizer(final_prompt, return_tensors="pt").to(device)
  outputs = model.generate(input_ids=encoding.input_ids, generation_config=GenerationConfig(max_new_tokens=256, pad_token_id = tokenizer.eos_token_id, \
                                                                                                                     eos_token_id = tokenizer.eos_token_id, attention_mask = encoding.attention_mask, \
                                                                                                                     temperature=0.4, top_p=0.6, repetition_penalty=1.3, num_return_sequences=1,))
  text_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

  print(dashline)
  print(f'ORIGINAL MODEL RESPONSE:\n{text_output}')
  print(dashline)

  peft_encoding = peft_tokenizer(final_prompt, return_tensors="pt").to(device)
  peft_outputs = peft_model.generate(input_ids=peft_encoding.input_ids, generation_config=GenerationConfig(max_new_tokens=256, pad_token_id = peft_tokenizer.eos_token_id, \
                                                                                                                     eos_token_id = peft_tokenizer.eos_token_id, attention_mask = peft_encoding.attention_mask, \
                                                                                                                     temperature=0.4, top_p=0.6, repetition_penalty=1.3, num_return_sequences=1,))
  peft_text_output = peft_tokenizer.decode(peft_outputs[0], skip_special_tokens=True)

  print(f'PEFT MODEL RESPONSE:\n{peft_text_output}')
  print(dashline)

In [24]:
generate_answer('What to do if you have mental illnes?')

-------------------------------------------------
ORIGINAL MODEL RESPONSE:
Answer the following question truthfully.
  If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
  If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'.
: What to do if you have mental illnes?
  : 1. Consult a psychiatrist.
  : 2. Consult a psychologist.
  : 3. Consult a psychoanalyst.
  : 4. Consult a psychotherapist.
  : 5. Consult a psychotherapist.
: What to do if you have mental illnes?
  : 1. Consult a psychiatrist.
  : 2. Consult a psychologist.
  : 3. Consult a psychoanalyst.
  : 4. Consult a psychotherapist.
  : 5. Consult a psychotherapist.
: What to do if you have mental illnes?
  : 1. Consult a psychiatrist.
  : 2. Consult a psychologist.
  : 3. Consult a psychoanalyst.
  : 4. Consult a psychotherapist.
  : 5. Consult a psychotherapist.
: What to do if you have mental illnes?
  : 1. Consult a psychiatrist.
  : 2. Consult a 

In [21]:
generate_answer("What are symptoms of panic attack vs. anxiety attack?")

-------------------------------------------------
ORIGINAL MODEL RESPONSE:
Answer the following question truthfully.
  If you don't know the answer, respond 'Sorry, I don't know the answer to this question.'.
  If the question is too complex, respond 'Kindly, consult a psychiatrist for further queries.'.
: What are symptoms of panic attack vs. anxiety attack?
  : 1. Panic attacks are sudden and unexpected.
  : 2. Anxiety attacks are sudden and unexpected.
  : 3. Panic attacks are sudden and unexpected.
  : 4. Anxiety attacks are sudden and unexpected.
: What are symptoms of panic attack vs. anxiety attack?
  : 1. Panic attacks are sudden and unexpected.
  : 2. Anxiety attacks are sudden and unexpected.
  : 3. Panic attacks are sudden and unexpected.
  : 4. Anxiety attacks are sudden and unexpected.
: What are symptoms of panic attack vs. anxiety attack?
  : 1. Panic attacks are sudden and unexpected.
  : 2. Anxiety attacks are sudden and unexpected.
  : 3. Panic attacks are sudden and 