<a href="https://colab.research.google.com/github/SriVinayA/SJSU-CMPE297-SpecialTopics/blob/main/assignment_3_A.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install Unsloth and dependencies
!pip install -q unsloth

# Upgrade Unsloth to the latest version from GitHub
!pip uninstall -q unsloth -y
!pip install -q --upgrade --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# Install Flash Attention 2 for softcapping support (if your GPU supports it)
import torch
if torch.cuda.get_device_capability()[0] >= 8:
    !pip install -q --no-deps packaging ninja einops "flash-attn>=2.6.3"


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
  Building wheel for unsloth (pyproject.toml) ... [?25l[?25hdone


In [None]:
from unsloth import FastLanguageModel
import torch
from transformers import TrainingArguments
from trl import SFTTrainer
from datasets import load_dataset

🦥 Unsloth: Will patch your computer to enable 2x faster free finetuning.


In [None]:
max_seq_length = 512  # Adjusted for specific tasks
dtype = None          # Auto-detection
load_in_4bit = True   # Reduce memory usage


In [None]:
model_name = "unsloth/gemma-2-2b"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)


==((====))==  Unsloth 2024.11.7: Fast Gemma2 patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


model.safetensors:   0%|          | 0.00/2.22G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/46.4k [00:00<?, ?B/s]

tokenizer.model:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/636 [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.5M [00:00<?, ?B/s]

# Use Case 1: Sentiment Analysis

### Prepare the Dataset

In [None]:
# Load the dataset
dataset = load_dataset('imdb')

# Reduce dataset size for demonstration purposes
small_train_dataset = dataset['train'].shuffle(seed=42).select(range(1000))
small_test_dataset = dataset['test'].shuffle(seed=42).select(range(200))


README.md:   0%|          | 0.00/7.81k [00:00<?, ?B/s]

train-00000-of-00001.parquet:   0%|          | 0.00/21.0M [00:00<?, ?B/s]

test-00000-of-00001.parquet:   0%|          | 0.00/20.5M [00:00<?, ?B/s]

unsupervised-00000-of-00001.parquet:   0%|          | 0.00/42.0M [00:00<?, ?B/s]

Generating train split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/25000 [00:00<?, ? examples/s]

Generating unsupervised split:   0%|          | 0/50000 [00:00<?, ? examples/s]

### Data Formatting

In [None]:
def format_sentiment_examples(examples):
    texts = []
    for text, label in zip(examples['text'], examples['label']):
        sentiment = 'positive' if label == 1 else 'negative'
        prompt = f"Classify the sentiment of the following review:\n\n{text}\n\nSentiment:"
        target = f" {sentiment}"
        texts.append(prompt + target)
    return {'text': texts}

train_dataset = small_train_dataset.map(format_sentiment_examples, batched=True)
test_dataset = small_test_dataset.map(format_sentiment_examples, batched=True)


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

Map:   0%|          | 0/200 [00:00<?, ? examples/s]

### Configure LoRA and Training Arguments

In [None]:
model = FastLanguageModel.get_peft_model(
    model,
    r=8,
    target_modules=["q_proj", "v_proj"],
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    use_gradient_checkpointing="unsloth",
)

training_args = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_steps=50,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_steps=10,
    output_dir="outputs_sentiment",
    save_total_limit=1,
)


Unsloth: Dropout = 0 is supported for fast patching. You are using dropout = 0.1.
Unsloth will patch all other layers, except LoRA matrices, causing a performance hit.
Unsloth 2024.11.7 patched 26 layers with 0 QKV layers, 0 O layers and 0 MLP layers.


### Fine-Tune the Model

In [None]:
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    args=training_args,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
)

trainer.train()


Map:   0%|          | 0/1000 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 1,000 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 62
 "-____-"     Number of trainable parameters = 1,597,440
[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:wandb: Paste an API key from your profile and hit enter, or press ctrl+c to quit:[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Step,Training Loss
10,3.1367
20,3.0582
30,2.9828
40,2.801
50,2.8255
60,2.752


TrainOutput(global_step=62, training_loss=2.919449537031112, metrics={'train_runtime': 667.997, 'train_samples_per_second': 1.497, 'train_steps_per_second': 0.093, 'total_flos': 5032432833841152.0, 'train_loss': 2.919449537031112, 'epoch': 0.992})

### Inference

In [None]:
FastLanguageModel.for_inference(model)

def classify_sentiment(review):
    prompt = f"Classify the sentiment of the following review:\n\n{review}\n\nSentiment:"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(**inputs, max_new_tokens=5)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    sentiment = response.split("Sentiment:")[-1].strip()
    return sentiment

# Example
review = "I absolutely loved this movie! The acting was great and the story was touching."
print(f"Sentiment: {classify_sentiment(review)}")


Sentiment: positive

Classify the sentiment


# Use Case 2: Named Entity Recognition

In [None]:
# Disable TorchDynamo and SymPy
import os
os.environ['TORCHDYNAMO_DISABLE'] = '1'
os.environ['TORCH_DISABLE_SYMPY'] = '1'

# Import Libraries
from unsloth import FastLanguageModel
import torch
from transformers import TrainingArguments
from trl import SFTTrainer
from datasets import load_dataset

# Load the Base Model and Tokenizer
max_seq_length = 512
dtype = None
load_in_4bit = True
model_name = "unsloth/gemma-2-2b"

model, tokenizer = FastLanguageModel.from_pretrained(
    model_name=model_name,
    max_seq_length=max_seq_length,
    dtype=dtype,
    load_in_4bit=load_in_4bit,
)

# Prepare the Dataset
dataset = load_dataset('conll2003')

def format_ner_examples(examples):
    texts = []
    for tokens, tags in zip(examples['tokens'], examples['ner_tags']):
        sentence = ' '.join(tokens)
        entities = []
        for token, tag in zip(tokens, tags):
            if tag != 0:  # 0 corresponds to 'O' (Outside any entity)
                entity = token
                entities.append(entity)
        entity_list = ', '.join(entities) if entities else 'None'
        prompt = f"Identify the named entities in the following sentence:\n\n{sentence}\n\nEntities:"
        target = f" {entity_list}"
        texts.append(prompt + target)
    return {'text': texts}

train_dataset = dataset['train'].map(format_ner_examples, batched=True)
test_dataset = dataset['validation'].map(format_ner_examples, batched=True)

# Reconfigure LoRA
model = FastLanguageModel.get_peft_model(
    model,
    r=8,
    target_modules=["q_proj", "v_proj"],
    lora_alpha=16,
    lora_dropout=0.1,
    bias="none",
    use_gradient_checkpointing=False,  # Disabled
)

# Update Training Arguments
training_args = TrainingArguments(
    per_device_train_batch_size=4,
    gradient_accumulation_steps=4,
    warmup_steps=50,
    num_train_epochs=1,
    learning_rate=2e-4,
    fp16=not torch.cuda.is_bf16_supported(),
    bf16=torch.cuda.is_bf16_supported(),
    logging_steps=10,
    output_dir="outputs_ner",
    save_total_limit=1,
    report_to="none",  # Disable wandb logging
)

# Initialize Trainer
trainer = SFTTrainer(
    model=model,
    tokenizer=tokenizer,
    train_dataset=train_dataset,
    args=training_args,
    dataset_text_field="text",
    max_seq_length=max_seq_length,
)

# Start Training
trainer.train()

==((====))==  Unsloth 2024.11.7: Fast Gemma2 patching. Transformers = 4.46.2.
   \\   /|    GPU: Tesla T4. Max memory: 14.748 GB. Platform = Linux.
O^O/ \_/ \    Pytorch: 2.5.1+cu121. CUDA = 7.5. CUDA Toolkit = 12.1.
\        /    Bfloat16 = FALSE. FA [Xformers = 0.0.28.post3. FA2 = False]
 "-____-"     Free Apache license: http://github.com/unslothai/unsloth
Unsloth: Fast downloading is enabled - ignore downloading bars which are red colored!


Map:   0%|          | 0/14041 [00:00<?, ? examples/s]

Map:   0%|          | 0/3250 [00:00<?, ? examples/s]

Map:   0%|          | 0/14041 [00:00<?, ? examples/s]

==((====))==  Unsloth - 2x faster free finetuning | Num GPUs = 1
   \\   /|    Num examples = 14,041 | Num Epochs = 1
O^O/ \_/ \    Batch size per device = 4 | Gradient Accumulation steps = 4
\        /    Total batch size = 16 | Total steps = 877
 "-____-"     Number of trainable parameters = 1,597,440


Step,Training Loss
10,4.1453
20,3.7926
30,2.969
40,2.2756
50,1.9962
60,1.786
70,1.6385
80,1.594
90,1.6
100,1.634


TrainOutput(global_step=877, training_loss=1.5578589357834993, metrics={'train_runtime': 1410.4902, 'train_samples_per_second': 9.955, 'train_steps_per_second': 0.622, 'total_flos': 9839188837988352.0, 'train_loss': 1.5578589357834993, 'epoch': 0.9991455425804614})

In [None]:
FastLanguageModel.for_inference(model)

def identify_entities(sentence):
    prompt = f"Identify the named entities in the following sentence:\n\n{sentence}\n\nEntities:"
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
    outputs = model.generate(
        **inputs,
        max_new_tokens=50,
        eos_token_id=tokenizer.eos_token_id,  # Ensure the model knows when to stop
        no_repeat_ngram_size=2,               # Prevent repetition
    )
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    entities = response.split("Entities:")[-1].strip()
    # Remove any trailing punctuation or repeated 'None's
    entities = entities.rstrip(', None')
    return entities

# Example
sentence = "Microsoft was founded by Bill Gates and Paul Allen in Albuquerque."
print(f"Entities: {identify_entities(sentence)}")


Entities: Microsoft, Bill, Gates, Paul, Allen, Albuquerque, None, Microsoft.com, Inc.
