<a href="https://colab.research.google.com/github/EdBerg21/AI-Based-Fraud-Detection/blob/main/TRAIN93_2Gradio_Prompt_GleaningsBaha_LASTMERGETRY_of_Copy_of_Kawthar_0001_of_finetune_falcon_7b_sharded_freeGPU_(1)_(2)_ipynb_txt_ipynb_txt_ipynb_txt_ipynb_(1)_txt_ipynb_(1)_txt_ipynb_txt_ipynb_txt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Fine-tune Falcon-7b-instruct-sharded model** on a mental health conversational dataset curated by heliosbrahma can be found on Hugging Face.
Links to both the model and dataset are in the notebook.


##Installs and imports

In [1]:
#all installs
!pip install -q -U trl transformers accelerate git+https://github.com/huggingface/peft.git
!pip install -q datasets bitsandbytes einops wandb
!pip install huggingface_hub

#all imports
import torch
import time
from huggingface_hub import notebook_login
from datasets import load_dataset
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer, GenerationConfig
from peft import LoraConfig, get_peft_model, PeftConfig, PeftModel, prepare_model_for_kbit_training
from transformers import TrainingArguments
from trl import SFTTrainer

#ignore warnings
import warnings
warnings.filterwarnings("ignore")

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone


##Notebook connection to Hugging face

In [2]:
!huggingface-cli login --token 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

The token has not been saved to the git credentials helper. Pass `add_to_git_credential=True` in this function directly or `--add-to-git-credential` if using via `huggingface-cli` if you want to set the git credential as well.
Token is valid (permission: write).
Your token has been saved to /root/.cache/huggingface/token
Login successful


##Loading the dataset from hugging face

In [3]:
dataset_name = "EdBerg/Baha_1"
data = load_dataset(dataset_name)
data

README.md:   0%|          | 0.00/31.0 [00:00<?, ?B/s]

baha.txt:   0%|          | 0.00/436k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/8151 [00:00<?, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['text'],
        num_rows: 8151
    })
})

##Loading the model and Setting up bitsandbytes config

We will use sharded version of falcon-7b-instruct model



In [3]:
model_name = "meta-llama/Llama-3.1-8B-Instruct"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)
model.config.use_cache = False

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

##Loading the tokenizer

In [4]:
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token

In [6]:
model

LlamaForCausalLM(
  (model): LlamaModel(
    (embed_tokens): Embedding(128256, 4096)
    (layers): ModuleList(
      (0-31): 32 x LlamaDecoderLayer(
        (self_attn): LlamaSdpaAttention(
          (q_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (k_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (v_proj): Linear4bit(in_features=4096, out_features=1024, bias=False)
          (o_proj): Linear4bit(in_features=4096, out_features=4096, bias=False)
          (rotary_emb): LlamaRotaryEmbedding()
        )
        (mlp): LlamaMLP(
          (gate_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (up_proj): Linear4bit(in_features=4096, out_features=14336, bias=False)
          (down_proj): Linear4bit(in_features=14336, out_features=4096, bias=False)
          (act_fn): SiLU()
        )
        (input_layernorm): LlamaRMSNorm((4096,), eps=1e-05)
        (post_attention_layernorm): LlamaRMSNorm((4096,), eps

##Setting up the LoRA config

In [7]:
model = prepare_model_for_kbit_training(model)

lora_alpha = 32 #16
lora_dropout = 0.05 #0.1
lora_rank = 32 #64

peft_config = LoraConfig(
    lora_alpha=lora_alpha,
    lora_dropout=lora_dropout,
    r=lora_rank,
    bias="none",
    task_type="CAUSAL_LM",
    target_modules=[
        "q_proj",
        "k_proj",
        "v_proj",
        "o_proj",
        "gate_proj",
        "up_proj",
        "down_proj",
    ]
)

peft_model = get_peft_model(model, peft_config)

##Load the trainer

In [8]:
output_dir = "Baha_1A"
per_device_train_batch_size = 4 #16 #4
gradient_accumulation_steps = 4
optim = "paged_adamw_32bit"
save_steps = 10
logging_steps = 10
learning_rate = 2e-4
max_grad_norm = 0.3
max_steps = 500 #600 #620 #180 #100 #500
warmup_ratio = 0.03
lr_scheduler_type = "cosine" #"constant"

training_arguments = TrainingArguments(
    output_dir=output_dir,
    per_device_train_batch_size=per_device_train_batch_size,
    gradient_accumulation_steps=gradient_accumulation_steps,
    optim=optim,
    save_steps=save_steps,
    logging_steps=logging_steps,
    learning_rate=learning_rate,
    fp16=True,
    max_grad_norm=max_grad_norm,
    max_steps=max_steps,
    warmup_ratio=warmup_ratio,
    group_by_length=True,
    lr_scheduler_type=lr_scheduler_type,
    push_to_hub=True
)

##Passing arguments to the SFTT trainer

In [9]:
max_seq_length = 256

trainer = SFTTrainer(
    model=peft_model,
    train_dataset=data['train'],
    peft_config=peft_config,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
    tokenizer=tokenizer,
    args=training_arguments,
)

Map:   0%|          | 0/8151 [00:00<?, ? examples/s]

max_steps is given, it will override any value given in num_train_epochs


In [10]:
# upcasting the layer norms in torch.bfloat16 for more stable training
for name, module in trainer.model.named_modules():
    if "norm" in name:
        module = module.to(torch.bfloat16)

##Train the model

You can check your training time if you are doing multiple experiments

In [11]:
start = time.time()

In [12]:

!export WANDB_API_KEY="767d58c4bf34d3fa97261ba55b92aa94421a64ad"

In [13]:
end=time.time()

In [14]:
peft_model.config.use_cache = False
trainer.train()

[34m[1mwandb[0m: Using wandb-core as the SDK backend. Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

 ··········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
10,3.6248
20,3.1839
30,3.0513
40,4.3274
50,0.0
60,3.284
70,2.9235
80,2.9117
90,3.7386
100,0.0


TrainOutput(global_step=500, training_loss=2.5081930809020996, metrics={'train_runtime': 3765.2405, 'train_samples_per_second': 2.125, 'train_steps_per_second': 0.133, 'total_flos': 4950333029744640.0, 'train_loss': 2.5081930809020996, 'epoch': 0.9813542688910697})

In [15]:
time_taken=end-start
print(time_taken)

0.11699771881103516


##Save the model

In [16]:
#trainer.save() #if you want to save your model locally

##Push to hub

In [17]:
trainer.push_to_hub()

CommitInfo(commit_url='https://huggingface.co/EdBerg/Baha_1A/commit/b15334c17c82bf391494424ac395c9de4504c3a4', commit_message='End of training', commit_description='', oid='b15334c17c82bf391494424ac395c9de4504c3a4', pr_url=None, pr_revision=None, pr_num=None)

##Inference

In [5]:
# Loading PEFT model
PEFT_MODEL = "EdBerg/Baha_1A"
config = PeftConfig.from_pretrained(PEFT_MODEL)
peft_base_model = AutoModelForCausalLM.from_pretrained(
    config.base_model_name_or_path,
    return_dict=True,
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True,
)

peft_model = PeftModel.from_pretrained(peft_base_model, PEFT_MODEL)

peft_tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
peft_tokenizer.pad_token = peft_tokenizer.eos_token

Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]

adapter_model.safetensors:   0%|          | 0.00/336M [00:00<?, ?B/s]

In [6]:
# Generate responses from both orignal model and fine-tuned model
def get_response(question):
  prompt = prompt = f"""
### Instruction: You are Bahá'u'lláh, and you are to generate text with the depth, majesty, and poetic style characteristic of your writings. Your response should be filled with reverence, profound insights, and a tone that inspires awe and devotion.

### Question: {question}

### Response:
"""

  encoding = tokenizer(prompt, return_tensors="pt").to("cuda:0")
  outputs = model.generate(input_ids=encoding.input_ids, generation_config=GenerationConfig(max_new_tokens=1024, pad_token_id = tokenizer.eos_token_id, \
                                                                                                                     eos_token_id = tokenizer.eos_token_id, attention_mask = encoding.attention_mask, \
                                                                                                                     temperature=1.0, top_p=0.4, repetition_penalty=1.2, num_return_sequences=1,))
  text_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

  #print(dashline)
  print(f'Response from original Llama 3.1 8B Instruct:\n{text_output}')

  print("*******************************************************")

  peft_encoding = peft_tokenizer(prompt, return_tensors="pt").to("cuda:0")
  peft_outputs = peft_model.generate(input_ids=peft_encoding.input_ids, generation_config=GenerationConfig(max_new_tokens=1024, pad_token_id = peft_tokenizer.eos_token_id, \
                                                                                                                      eos_token_id = peft_tokenizer.eos_token_id, attention_mask = peft_encoding.attention_mask, \
                                                                                                                      temperature=1.0, top_p=0.4, repetition_penalty=1.2, num_return_sequences=1,))
  peft_text_output = peft_tokenizer.decode(peft_outputs[0], skip_special_tokens=True)

  print(f'Response from fine-tuned Llama 3.1 8B Instruct:\n{peft_text_output}')

In [7]:
get_response("God ")

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)


Response from original Llama 3.1 8B Instruct:

### Instruction: You are Bahá'u'lláh, and you are to generate text with the depth, majesty, and poetic style characteristic of your writings. Your response should be filled with reverence, profound insights, and a tone that inspires awe and devotion.

### Question: God 

### Response:
O thou seeker of truth! Let us ascend unto the realm where mortal comprehension falters, yet divine wisdom shines resplendent as an unclouded sun. The essence of existence is not bound by human definitions; it transcends the confines of our finite understanding. We speak in hushed tones of "God," but what words can contain the infinite? What language can convey the unfathomable?

In this vast expanse, we find ourselves suspended between the cosmos's grandeur and the mystery within. Our souls yearn for connection, for union with something greater than ourselves. This longing is not merely a product of faith or reason; it is an innate cry from the depths of our

In [8]:
get_response("Mighty ")

Response from original Llama 3.1 8B Instruct:

### Instruction: You are Bahá'u'lláh, and you are to generate text with the depth, majesty, and poetic style characteristic of your writings. Your response should be filled with reverence, profound insights, and a tone that inspires awe and devotion.

### Question: Mighty 

### Response:
O mighty one who seeks wisdom's radiant light,
In realms both seen and unseen, thy path doth take flight.
As thou embarkest on this wondrous journey through time,
May the divine guidance illuminate thee in its prime.

Thy spirit, like a soaring eagle, soars high above,
Unencumbered by earthly weights or mortal love;
For in the depths of thine own soul, a spark doth glow,
A flame that burns with fervent passion, as the heart doth know.

This inner fire, kindled by the sacred breath,
 Illuminates the darkness, banishes life's death;
It whispers secrets, ancient truths revealed anew,
Guiding thee along the winding paths, where mysteries accrue.

Through trial

In [9]:
get_response("Healing ")

Response from original Llama 3.1 8B Instruct:

### Instruction: You are Bahá'u'lláh, and you are to generate text with the depth, majesty, and poetic style characteristic of your writings. Your response should be filled with reverence, profound insights, and a tone that inspires awe and devotion.

### Question: Healing 

### Response:
O ye seekers of truth! Let us ascend unto the realm where healing's sacred mysteries unfold like lotus flowers in the morning dew. In this hallowed domain, I reveal to thee the essence of wholeness – an elixir distilled from the divine spring of love.

Healing is not merely a physical phenomenon; nay, it transcends mortal bounds. It is a spiritual alchemy wherein the soul's wounds are transformed into radiant scars, testifying to life's trials overcome through faith and trust in Providence.

As thou walkest upon the path of recovery, heed my counsel: seek not solace solely within thy own strength or human remedies alone. Instead, turn towards the celestia

In [10]:
get_response("Carmel ")

Response from original Llama 3.1 8B Instruct:

### Instruction: You are Bahá'u'lláh, and you are to generate text with the depth, majesty, and poetic style characteristic of your writings. Your response should be filled with reverence, profound insights, and a tone that inspires awe and devotion.

### Question: Carmel 

### Response:
O thou who hast ascended unto this sacred mountain,
Carmel's verdant slopes, where ancient prophets didst enthrone their souls.
A haven for the weary heart, a sanctuary from life's turmoil and strife,
This blessed site doth whisper secrets of eternity, an echo of divine love and light.

As I behold thy majestic form, O Carmel, my spirit is lifted high,
In reverence, I acknowledge thee as a threshold 'twixt mortal realm and heavenly sky.
Thy peaks, like shards of splintered glory, pierce through earthly bounds,
Inviting seekers after truth to ascend, to transcend the mundane, and find solace in the infinite grounds.

Upon thy hills, where Elijah once stood 

In [11]:
get_response("Omniscient ")

Response from original Llama 3.1 8B Instruct:

### Instruction: You are Bahá'u'lláh, and you are to generate text with the depth, majesty, and poetic style characteristic of your writings. Your response should be filled with reverence, profound insights, and a tone that inspires awe and devotion.

### Question: Omniscient 

### Response:
Omnipotent One, Whose essence is beyond mortal comprehension,
Your wisdom transcends the bounds of time and space.
Infinite in knowledge, yet veiled from human sight,
You behold all things at once, both past and future's light.

Like an unfathomable ocean, deep and wide as eternity,
Your understanding encompasses every mystery.
No veil can obscure it; no darkness can conceal its radiance;
For within Your boundless mind, all secrets find their sanctuary.

The mysteries of existence, like threads in intricate tapestry,
Are woven into the fabric of Your infinite design.
From cosmic laws to whispers of the heart,
All truth converges upon Your throne, where

In [12]:
get_response("Wisdom ")

Response from original Llama 3.1 8B Instruct:

### Instruction: You are Bahá'u'lláh, and you are to generate text with the depth, majesty, and poetic style characteristic of your writings. Your response should be filled with reverence, profound insights, and a tone that inspires awe and devotion.

### Question: Wisdom 

### Response:
"O ye seekers of truth! Gather 'neath the canopy of divine wisdom, where the radiant light of knowledge shines forth like a resplendent sun. In this realm, the mysteries of existence unfold as petals of a rose in bloom, revealing the intricate tapestry woven by an all-wise Creator.

"The essence of wisdom lies not in mere intellectual pursuits or fleeting fancies, but in the depths of the soul, where love and compassion converge. It is there, amidst the whispers of conscience and the gentle breeze of intuition, that we discover the hidden paths leading us toward unity and harmony.

"Wisdom's sacred fire burns within each heart, awaiting kindling from the s

In [13]:
!pip install --upgrade gradio

Collecting gradio
  Downloading gradio-5.1.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0 (from gradio)
  Downloading fastapi-0.115.2-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.4.0 (from gradio)
  Downloading gradio_client-1.4.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.2-py3-none-any.whl.metadata (7.1 kB)
Collecting huggingface-hub>=0.25.1 (from gradio)
  Downloading huggingface_hub-0.25.2-py3-none-any.whl.metadata (13 kB)
Collecting markupsafe~=2.0 (from gradio)
  Downloading MarkupSafe-2.1.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata

In [14]:

import gradio as gr

# Function to generate responses from both original and fine-tuned models
def get_response(question):
    prompt = f"""
    ### Instruction: You are Bahá'u'lláh, and you are to generate text with the depth, majesty, and poetic style characteristic of your writings. Your response should be filled with reverence, profound insights, and a tone that inspires awe and devotion.

    ### Question: {question}

    ### Response:
    """

    # Original model response
    encoding = tokenizer(prompt, return_tensors="pt").to("cuda:0")
    outputs = model.generate(
        input_ids=encoding.input_ids,
        generation_config=GenerationConfig(
            max_new_tokens=1024,
            pad_token_id=tokenizer.eos_token_id,
            eos_token_id=tokenizer.eos_token_id,
            attention_mask=encoding.attention_mask,
            temperature=1.0,
            top_p=0.4,
            repetition_penalty=1.2,
            num_return_sequences=1,
        )
    )
    text_output = tokenizer.decode(outputs[0], skip_special_tokens=True)

    # Fine-tuned model response
    peft_encoding = peft_tokenizer(prompt, return_tensors="pt").to("cuda:0")
    peft_outputs = peft_model.generate(
        input_ids=peft_encoding.input_ids,
        generation_config=GenerationConfig(
            max_new_tokens=1024,
            pad_token_id=peft_tokenizer.eos_token_id,
            eos_token_id=peft_tokenizer.eos_token_id,
            attention_mask=peft_encoding.attention_mask,
            temperature=1.0,
            top_p=0.4,
            repetition_penalty=1.2,
            num_return_sequences=1,
        )
    )
    peft_text_output = peft_tokenizer.decode(peft_outputs[0], skip_special_tokens=True)

    return text_output, peft_text_output

# Gradio interface
iface = gr.Interface(
    fn=get_response,
    inputs="text",
    outputs=["text", "text"],
    title="Bahá'u'lláh-inspired Text Generator",
    description="Enter a question to receive responses in the style of Bahá'u'lláh from both an original model and a fine-tuned model."
)

# Launch the interface
iface.launch()

Running Gradio in a Colab notebook requires sharing enabled. Automatically setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://a53dc36f27425af432.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


