### Install Dependencies

In [1]:
%%capture
# Installs Unsloth, Xformers (Flash Attention) and all other packages!
!pip install torch==2.3.0 torchvision==0.18.0 torchaudio==2.3.0 --index-url https://download.pytorch.org/whl/cu118
!pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
!pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes
!pip install datasets scipy ipywidgets matplotlib evaluate rouge_score
!pip install wandb -qU
!pip install --upgrade transformers huggingface_hub

### Set-up Enironment Variables

In [2]:
from google.colab import userdata

### Load Dataset

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
from datasets import load_dataset
training_data = load_dataset("json", data_files="/content/drive/MyDrive/Thesis/Datasets/perspectrum_instruction_dataset_v2.jsonl", split = "train")

Generating train split: 0 examples [00:00, ? examples/s]

In [5]:
training_data[-1]

{'Claim': 'Net Neutrality – All Internet Traffic Should Be Treated Equally.',
 'Context': 'Supporting Perspectives:\n- Net neutrality is required to preserve the existing structure of the internet.\n- Net neutrality provides for the free circulation of data and services.\n- Net neutrality maintains a free market and even playing field.\n- Free speech is a right that should be allowed online.\n- Net neutrality preserves free speech on the internet by prohibiting internet service providers from blocking content.\n- Blocking content violates everyones right to free speech.\n- Net neutrality protects free speech from internet service provider threats.\n- Net neutrality like in democracy , will develop and preserve democracy and free speech.\n- Net neutrality in a democratic society will provide a free and open internet.\n- Net neutrality helps preserve democracy and free speech.\n- Net neutrality protects consumers by preventing ISPs from speeding, slowing, or charging higher fees for sele

### Load Model and Tokenizer

In [None]:
from unsloth import FastLanguageModel
import torch
max_seq_length = 2048 # Choose any! Mistral auto support RoPE Scaling internally!
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False.
model_name = "unsloth/mistral-7b-instruct-v0.2-bnb-4bit"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name = model_name,
    max_seq_length = max_seq_length,
    dtype = dtype,
    load_in_4bit = load_in_4bit,
)

In [None]:
# Building Lora enabled model
model = FastLanguageModel.get_peft_model(
    model,
    r = 32, #32 (Before)
    target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
                      "gate_proj", "up_proj", "down_proj",],
    lora_alpha = 64, #64 (before)
    lora_dropout = 0, # Supports any, but = 0 is optimized
    bias = "none",    # Supports any, but = "none" is optimized
    use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context
    random_state = 3407,
    use_rslora = False,  # We support rank stabilized LoRA
    loftq_config = None, # And LoftQ
)

In [None]:
print(tokenizer.pad_token)

### Set-up accelarator for Multi-GPU sharding and distribution if available

In [None]:
from accelerate import FullyShardedDataParallelPlugin, Accelerator
from torch.distributed.fsdp.fully_sharded_data_parallel import FullOptimStateDictConfig, FullStateDictConfig

fsdp_plugin = FullyShardedDataParallelPlugin(
    state_dict_config=FullStateDictConfig(offload_to_cpu=True, rank0_only=False),
    optim_state_dict_config=FullOptimStateDictConfig(offload_to_cpu=True, rank0_only=False),
)

accelerator = Accelerator(fsdp_plugin=fsdp_plugin, mixed_precision="bf16")

### Formatting Prompts

In [10]:
eos_token = tokenizer.eos_token
print(eos_token)

</s>


In [11]:
System_prompt = "Below is an instruction that describes an information requirement, paired with a claim that provides context. Write a response that appropriately addresses the instruction based on the given claim."
Input_prompt = "### Instruction:\n{instruction}\n\n### Claim:\n{claim}\n\n### Response:\n{answer}"

In [12]:
def formatting_prompts_func(examples):
    claim = examples['Claim']
    instruction = examples['Instruction']
    answer = examples['Answer']
    texts = []
    for claim, instruction, answer in zip(claim, instruction, answer):
        text = Input_prompt.format(instruction=instruction,
                                   claim=claim,
                                   answer=answer) +eos_token
        full_prompt = f"{System_prompt}\n\n{text}"
        texts.append(full_prompt)
    return {"text": texts,}

In [None]:
final_formatted_dataset = training_data.map(formatting_prompts_func, batched=True,)

In [14]:
final_formatted_dataset[-1]

{'Claim': 'Net Neutrality – All Internet Traffic Should Be Treated Equally.',
 'Context': 'Supporting Perspectives:\n- Net neutrality is required to preserve the existing structure of the internet.\n- Net neutrality provides for the free circulation of data and services.\n- Net neutrality maintains a free market and even playing field.\n- Free speech is a right that should be allowed online.\n- Net neutrality preserves free speech on the internet by prohibiting internet service providers from blocking content.\n- Blocking content violates everyones right to free speech.\n- Net neutrality protects free speech from internet service provider threats.\n- Net neutrality like in democracy , will develop and preserve democracy and free speech.\n- Net neutrality in a democratic society will provide a free and open internet.\n- Net neutrality helps preserve democracy and free speech.\n- Net neutrality protects consumers by preventing ISPs from speeding, slowing, or charging higher fees for sele

### Check memory stats and print trainable parameters

In [None]:
gpu_stats = torch.cuda.get_device_properties(0)
start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3)
max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3)
print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.")
print(f"{start_gpu_memory} GB of memory reserved.")

In [16]:
def print_trainable_parameters(model):
    """
    Prints the number of trainable parameters in the model.
    """
    trainable_params = 0
    all_param = 0
    for _, param in model.named_parameters():
        all_param += param.numel()
        if param.requires_grad:
            trainable_params += param.numel()
    print(
        f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
    )

In [17]:
print_trainable_parameters(model)

trainable params: 41943040 || all params: 3794014208 || trainable%: 1.1055056122762943


### Training using SFTTrainer

In [None]:
if torch.cuda.device_count() > 1: # If more than 1 GPU
    model.is_parallelizable = True
    model.model_parallel = True

model = accelerator.prepare_model(model)

In [18]:
from transformers import TrainingArguments
from unsloth import is_bfloat16_supported
from datetime import datetime
model_id = "PerspectrumInstruct-Baseline-R_32-Alpha_64_Batch_4-Step_50-FT-Unsloth_Mistral7B"
training_arguments = TrainingArguments(
    output_dir= "./" + model_id,
    per_device_train_batch_size = 4,
    per_device_eval_batch_size = 4,
    gradient_accumulation_steps = 4,
    gradient_checkpointing=True,
    warmup_steps = 5,
    max_steps = 50,
    learning_rate = 2e-4,
    fp16 = not is_bfloat16_supported(),
    bf16 = is_bfloat16_supported(),
    logging_steps = 20,
    logging_dir="./logs",
    save_strategy="steps", # Save the model checkpoint every logging step
    save_steps=20,         # Save checkpoints every 50 steps
    eval_strategy="steps",
    eval_steps=20,         # Evaluate and save checkpoints every 50 steps
    do_eval=True,          # Perform evaluation at the end of training
    optim = "adamw_8bit",
    weight_decay = 0.01,
    lr_scheduler_type = "linear",
    seed = 3407,
    report_to="wandb",    # Comment this out if you don't want to use weights & baises
    run_name=f"{model_id}-{datetime.now().strftime('%Y-%m-%d-%H-%M')}", # Name of the W&B run (optional)
    push_to_hub=True,
    hub_model_id = f"TonyStarkD99/{model_id}",
    hub_strategy = "end",
    hub_token = userdata.get("HF_TOKEN"), #Your Huggingface token here
    greater_is_better=True,
    load_best_model_at_end=True
    )

In [None]:
from trl import SFTTrainer
trainer = SFTTrainer(
    model = model,
    tokenizer = tokenizer,
    train_dataset = final_formatted_dataset,
    eval_dataset = final_formatted_dataset,
    dataset_text_field = "text",
    max_seq_length = max_seq_length,
    dataset_num_proc = 2,
    # packing = False, # Can make training 5x faster for short sequences.
    args = training_arguments,
)

### Set-up wandb logging

In [None]:
import wandb
wandb.login(key=userdata.get("WANDB_TOKEN")) #Your WandB token here
wandb.init(
    project="PerspectrumInstruct-Baseline-FT-Unsloth_Mistral7B",
    name='R_32-Alpha_64-Samples_4-Steps_50',
    tags=["training", "adapter"],
    notes="Fine-tuning Mitsral 7B on Perspectrum Instruction dataset with Per device batch size = 4 and steps to train = 50",
    config=training_arguments,
    )

### Train Trainer

In [None]:
# Train the model with evaluation
wandb.watch(model)
trainer.train()

### Save Model

In [None]:
trainer.push_to_hub(
    commit_message="Baseline Training completed BatchSize 4 steps 50",
    tags=["adapter", "instruct-tuning", "Mistral7B", "Batch_Size-4", "Steps-50"]
)

### Evaluate Model after Loading from Hugging face.

In [None]:
table = wandb.Table(columns=["instruction", "claim", "response"])

In [None]:
FastLanguageModel.for_inference(model)

In [None]:
inst = 'Compare the opinion distributions for the following claim.' #An example from the perspectrum dataset
clm = 'Vaccination must be made compulsory.'

In [None]:
# System_prompt and Input_prompt defined above
text = Input_prompt.format(instruction=inst,
                           claim=clm,
                           answer='',)
full_prompt = f"{System_prompt}\n\n{text}"
print(full_prompt)

inputs = tokenizer(
[
    full_prompt,
], return_tensors = "pt").to(torch.device("cuda"))

In [None]:
inputs

In [None]:
from transformers import TextStreamer
text_streamer = TextStreamer(tokenizer, skip_prompt = True)
output = model.generate(input_ids = inputs.input_ids, attention_mask = inputs.attention_mask,
                   streamer = text_streamer, max_new_tokens = 128, pad_token_id = tokenizer.eos_token_id)

In [None]:
gen_text = tokenizer.decode(output[0])

In [None]:
table.add_data(inst, clm, gen_text)

In [None]:
wandb.log({"Generations":table})

In [None]:
wandb.finish()