<a href="https://colab.research.google.com/github/Shashank123-hub/AI-ML-Projects/blob/main/Meeting_TLDR_Generator.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Libraries for model fine-tuning, quantization, and dataset handling
!pip install -q transformers datasets peft accelerate bitsandbytes trl

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.1/76.1 MB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m348.0/348.0 kB[0m [31m15.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m491.5/491.5 kB[0m [31m24.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m193.6/193.6 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m35.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m40.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Defining the model name
# We're using Phi-2 by Microsoft (a small but powerful LLM)
model_name = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

# Loading the LLM model
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto")   # This auto maps to available GPU

# Loading the tokenizer
# Used to convert text to tokens and vice versa
tokenizer = AutoTokenizer.from_pretrained(model_name)

OSError: There was a specific connection error when trying to load TinyLlama/TinyLlama-1.1B-Chat-v1.0:
401 Client Error: Unauthorized for url: https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0/resolve/main/config.json (Request ID: Root=1-6829c2f6-4ceab5b95684e9206e8e3c94;f4f4c663-b18a-4f91-9839-ef089186a3e4)

Invalid credentials in Authorization header

In [None]:
from datasets import Dataset

# Creating a fake dataset for testing (transcripts, summaries and action items)
test_data = [
    {
       "transcript": "Alice: Let's plan the Q2 campaign.\nBob: We need to finalize the budget.\nAlice: I'll create the deck.",
        "summary": "Team discussed Q2 campaign planning. Alice will prepare the deck.",
        "actions": "- Finalize Q2 budget\n- Alice to create campaign deck"
    },
    {
        "transcript": "Tom: Sales are up 10%.\nLinda: Let's double down on the current strategy.",
        "summary": "Sales increased 10%. Team plans to continue current strategy.",
        "actions": "- Maintain current sales strategy"
    }
]

# Converting the dataset to a HuggingFace dataset object
dataset = Dataset.from_list(test_data)

# Creating a fn to format each entry in a single string for training
def format(data):
  return {
      "text": f"<transcript>\n{data['transcript']}\n</transcript>\n"
                f"<summary>\n{data['summary']}\n</summary>\n"
                f"<action_items>\n{data['actions']}\n</action_items>"
  }

# Applying above function to every example
dataset = dataset.map(format)

Map:   0%|          | 0/2 [00:00<?, ? examples/s]

In [None]:
from trl import SFTTrainer
from peft import LoraConfig
from transformers import TrainingArguments

# Step 1: Define how LoRA works — only update small parts of the model
lora_config = LoraConfig(
    r=16,                      # Size of the adapter matrices (higher = more capacity)
    lora_alpha=32,             # Scaling factor (how much LoRA influences the output)
    target_modules=["q_proj", "v_proj"],
    # Which parts of the model LoRA should touch (common for transformers)
    lora_dropout=0.05,         # Regularization to avoid overfitting
    bias="none",               # Only train LoRA weights, not bias terms
    task_type="CAUSAL_LM"      # We're training a causal language model (predict next word)
)

# Step 2: Define how training should run
training_args = TrainingArguments(
    per_device_train_batch_size=2,         # Number of samples per GPU step
    gradient_accumulation_steps=4,         # How many batches before backprop (simulate larger batches)
    warmup_steps=10,                       # "Ease in" steps for learning rate
    max_steps=50,                          # Small number of total steps (for demo)
    learning_rate=2e-4,                    # How fast the model learns
    fp16=True,                             # Use half-precision to save memory
    logging_steps=10,                      # Print progress every 10 steps
    output_dir="./results",                # Where to save model logs
    save_strategy="no"                     # Skip checkpoint saving (simpler for demos)
)


In [None]:
# Create the training pipeline using TRL's SFTTrainer (Supervised Fine-Tuning)
from transformers import DataCollatorForLanguageModeling

# A data collator dynamically pads your inputs and masks tokens for training
from transformers import DataCollatorForLanguageModeling
from trl import SFTTrainer

# Create the data collator for language modeling (causal, not masked)
data_collator = DataCollatorForLanguageModeling(
    tokenizer=tokenizer,
    mlm=False
)

# Setup trainer without 'max_seq_length'
trainer = SFTTrainer(
    model=model,                        # Phi-2 base model
    args=training_args,                # TrainingArguments defined earlier
    train_dataset=dataset,             # Our small fine-tuning dataset
    peft_config=lora_config,           # LoRA adapter config
    data_collator=data_collator       # For padding/truncating text
)

# Begin fine-tuning!
trainer.train()

Converting train dataset to ChatML:   0%|          | 0/2 [00:00<?, ? examples/s]

Adding EOS to train dataset:   0%|          | 0/2 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/2 [00:00<?, ? examples/s]

Truncating train dataset:   0%|          | 0/2 [00:00<?, ? examples/s]

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
wandb: Paste an API key from your profile and hit enter:

 ··········




ValueError: API key must be 40 characters long, yours was 37

In [None]:
# Save the model and tokenizer locally

trainer.model.save_pretrained("tldr-model")
tokenizer.save_pretrained("tldr-token")