In [None]:
# Install required libraries
!pip install transformers datasets trl torch accelerate bitsandbytes wandb

Collecting trl
  Downloading trl-0.23.0-py3-none-any.whl.metadata (11 kB)
Collecting bitsandbytes
  Downloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl.metadata (11 kB)
Downloading trl-0.23.0-py3-none-any.whl (564 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m564.7/564.7 kB[0m [31m14.6 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading bitsandbytes-0.47.0-py3-none-manylinux_2_24_x86_64.whl (61.3 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.3/61.3 MB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bitsandbytes, trl
Successfully installed bitsandbytes-0.47.0 trl-0.23.0


In [None]:
# Import necessary modules
from transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, pipeline
from datasets import load_dataset
from trl import SFTConfig, SFTTrainer, setup_chat_format
import torch
import os

# Detect device (GPU, MPS, or CPU)
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps" if torch.backends.mps.is_available() else "cpu"
)

***Quick Check For Resources - Runtime***

In [None]:
!nvidia-smi

Sat Sep 13 01:39:11 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   49C    P8             10W /   70W |       2MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline

model_name = "HuggingFaceTB/SmolLM2-135M-Instruct"
model = AutoModelForCausalLM.from_pretrained(pretrained_model_name_or_path=model_name)
tokenizer = AutoTokenizer.from_pretrained(pretrained_model_name_or_path=model_name)

tokenizer.chat_template = None

from trl.models.utils import setup_chat_format
model, tokenizer = setup_chat_format(model=model, tokenizer=tokenizer)

prompt = "Explain AGI?"
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0)
print(pipe(prompt, max_new_tokens=200))


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/861 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/655 [00:00<?, ?B/s]

Device set to use cuda:0


[{'generated_text': 'Explain AGI?'}]


In [None]:
# Load the dataset
ds = load_dataset("prithivMLmods/Deepthink-Reasoning")

# Tokenization function with batch handling
def tokenize_function(examples):
    # Apply strip to each string in the batch
    prompts = [p.strip() for p in examples["prompt"]]
    responses = [r.strip() for r in examples["response"]]

    # Apply chat template for each prompt-response pair
    texts = [
        tokenizer.apply_chat_template(
            [{"role": "user", "content": p}, {"role": "assistant", "content": r}],
            tokenize=False
        )
        for p, r in zip(prompts, responses)
    ]

    # Tokenize the combined texts
    return tokenizer(texts, truncation=True, padding="max_length", max_length=512)

# Apply the tokenization function to the dataset
ds = ds.map(tokenize_function, batched=True)

README.md: 0.00B [00:00, ?B/s]

dataset/0000.parquet:   0%|          | 0.00/401k [00:00<?, ?B/s]

Generating train split:   0%|          | 0/251 [00:00<?, ? examples/s]

Map:   0%|          | 0/251 [00:00<?, ? examples/s]

In [None]:
use_bf16 = torch.cuda.is_bf16_supported()

# Prepare training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=2,
    gradient_accumulation_steps=4,
    warmup_steps=5,
    max_steps=100,  # Adjust max_steps as needed
    learning_rate=5e-5,
    fp16=not use_bf16,  # Use fp16 if bf16 is not supported
    bf16=use_bf16,  # Use bf16 if supported
    logging_steps=1,
    optim="adamw_8bit",
    weight_decay=0.01,
    lr_scheduler_type="linear",
    seed=3407,
    output_dir="outputs",
    report_to="none",  # Use 'wandb' or 'tensorboard' if needed or pass 'none'
)

# Initialize the SFTTrainer
trainer = SFTTrainer(
    model=model,
    processing_class=tokenizer,  # Use tokenizer as processing_class
    train_dataset=ds["train"],  # Train dataset
    args=training_args,  # Pass the training arguments
)

In [None]:
# Start training
trainer.train()

Step,Training Loss
1,0.7275
2,0.9754
3,0.7324
4,0.8201
5,0.9517
6,0.7685
7,0.8823
8,0.7407
9,0.9175
10,0.621


TrainOutput(global_step=100, training_loss=0.6791249468922615, metrics={'train_runtime': 203.1761, 'train_samples_per_second': 3.937, 'train_steps_per_second': 0.492, 'total_flos': 256111758213120.0, 'train_loss': 0.6791249468922615, 'epoch': 3.126984126984127})

In [None]:
  # Save model and tokenizer to a local directory
save_directory = "/content/my_model"
model.save_pretrained(save_directory)
tokenizer.save_pretrained(save_directory)

# Zip the directory
import shutil
shutil.make_archive(save_directory, 'zip', save_directory)

# Download the zip file
# from google.colab import files
# files.download(f"{save_directory}.zip")

'/content/my_model.zip'

# Test the model after fine-tunning
NOTE: Does not work well with context outside the trainning dataset

In [None]:
save_directory = "/content/my_model"
loaded_model = AutoModelForCausalLM.from_pretrained(save_directory)
loaded_tokenizer = AutoTokenizer.from_pretrained(save_directory)

# First prompt: Outside the context of the training set
prompt = "Explain AGI?"
pipe = pipeline("text-generation", model=loaded_model, tokenizer=loaded_tokenizer, device=0)
print(pipe(prompt, max_new_tokens=400))

# Second prompt: One of the training set prompts
prompt = "Create a machine learning classifier in Python for categorizing user input."
pipe = pipeline("text-generation", model=loaded_model, tokenizer=loaded_tokenizer, device=0)
print(pipe(prompt, max_new_tokens=400))

# Third prompt: Same context as the training set prompts
prompt = "Machine learning classifier"
pipe = pipeline("text-generation", model=loaded_model, tokenizer=loaded_tokenizer, device=0)
print(pipe(prompt, max_new_tokens=400))

Device set to use cuda:0


[{'generated_text': 'Machine learning classifier\n\n4. **Sentiment analysis**: Sentiment analysis is used to analyze and interpret text data, including sentiment, emotion, and connotations.\n\nI will be using Python for the Sentiment Analysis task. I will use the `nltk` library to perform sentiment analysis, which is widely used for sentiment analysis, and `TextBlob` from the `nltk.sentiment` library for sentiment analysis. I will also use the `textwrap` module from `nltk` to split the text into sentences.\n\n- First, I will define a helper function `is_negative` to check if a word is negative.\n- Then, I will use the `nltk.sentiment` library to perform sentiment analysis on the text, and the `nltk.TextReformer` library to train a Sentiment Prediction model.\n- I will use the `textwrap.indent` function to indent sentences and the `textwrap.dumont` function to format the text.\n- I will use the `nltk.SentimentClassifier` class from `nltk` to train the model.\n- Finally, I will print the

# Simple fine tuning with a different dataset and different parameters.

# Step 1: Load and Prepare dataset

In [6]:
!pip -q install trl transformers datasets torch accelerate --upgrade bitsandbytes

from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
import torch

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/564.7 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m564.7/564.7 kB[0m [31m27.7 MB/s[0m eta [36m0:00:00[0m
[?25h

In [3]:
# Load the GSM8K dataset (grade school math problems)
ds = load_dataset("openai/gsm8k", "main")

print("\n=== Dataset Sample ===")
print(ds["train"][0])

# Load the tokenizer and model
model_name = "HuggingFaceTB/SmolLM2-135M-Instruct" # Or any other model you want to use
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)


def preprocess_gsm8k(examples):
    texts = []
    for question, answer in zip(examples["question"], examples["answer"]):
        # Format as instruction-response pair
        full_text = f"Question: {question}\n\nAnswer: {answer}"
        texts.append(full_text)

    # Add padding token if it's missing
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token

    return tokenizer(texts, truncation=True, padding="max_length", max_length=512)

# Apply preprocessing
ds = ds.map(preprocess_gsm8k, batched=True, remove_columns=ds["train"].column_names)

print(f"\n✅ Preprocessed {len(ds['train'])} math problem samples.")


=== Dataset Sample ===
{'question': 'Natalia sold clips to 48 of her friends in April, and then she sold half as many clips in May. How many clips did Natalia sell altogether in April and May?', 'answer': 'Natalia sold 48/2 = <<48/2=24>>24 clips in May.\nNatalia sold 48+24 = <<48+24=72>>72 clips altogether in April and May.\n#### 72'}


tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/655 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/861 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/269M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Map:   0%|          | 0/7473 [00:00<?, ? examples/s]

Map:   0%|          | 0/1319 [00:00<?, ? examples/s]


✅ Preprocessed 7473 math problem samples.


# Step 2: Training arguments and Training the model

In [8]:
from transformers import TrainingArguments
from trl import SFTTrainer, SFTConfig

# Define training arguments
# Using SFTConfig for SFTTrainer as in the previous example
training_args = SFTConfig(
    output_dir="./gsm8k_results",  # Output directory for checkpoints and logs
    num_train_epochs=3,
    per_device_train_batch_size=8,
    gradient_accumulation_steps=2,
    warmup_steps=100,
    learning_rate=2e-5,  # Learning rate
    weight_decay=0.01,  # Strength of weight decay
    logging_dir="./gsm8k_logs",  # Directory for storing logs
    logging_steps=100, # Log every 100 steps
    report_to="none",
    fp16=torch.cuda.is_available(), # Enable mixed precision if CUDA is available
    max_steps=100,
    optim="adamw_8bit",
    lr_scheduler_type="linear",
    seed=3407,
)


# Initialize the SFTTrainer
trainer = SFTTrainer(
    model=model,
    args=training_args,
    train_dataset=ds["train"],
    eval_dataset=ds["test"] if "test" in ds else None,
    processing_class=tokenizer,
)

print("✅ SFTTrainer initialized with GSM8K dataset.")

Truncating train dataset:   0%|          | 0/7473 [00:00<?, ? examples/s]

Truncating eval dataset:   0%|          | 0/1319 [00:00<?, ? examples/s]

✅ SFTTrainer initialized with GSM8K dataset.


In [11]:
# Start training
trainer.train()

Step,Training Loss
100,0.5673


TrainOutput(global_step=100, training_loss=0.5673441314697265, metrics={'train_runtime': 225.6064, 'train_samples_per_second': 7.092, 'train_steps_per_second': 0.443, 'total_flos': 522011226931200.0, 'train_loss': 0.5673441314697265, 'epoch': 0.21390374331550802})

# Step 3: Load the fine-tuned model and tokenizer

In [13]:
# Define the directory path where the fine-tuned model and tokenizer were saved.
save_directory = "./gsm8k_results"

# Load the fine-tuned model and tokenizer
loaded_model_gsm8k = trainer.model
loaded_tokenizer_gsm8k = tokenizer

# Save the model and tokenizer to the directory after training
loaded_model_gsm8k.save_pretrained(save_directory)
loaded_tokenizer_gsm8k.save_pretrained(save_directory)

print("✅ Fine-tuned model and tokenizer loaded and saved correctly.")

✅ Fine-tuned model and tokenizer loaded and saved correctly.


# Step 4: Create pipelines for both models

In [15]:
from transformers import pipeline

# Create a text generation pipeline for the original model
pipe_original = pipeline("text-generation", model=model, tokenizer=tokenizer, device=0 if torch.cuda.is_available() else -1)

# Create a text generation pipeline for the fine-tuned model
pipe_finetuned = pipeline("text-generation", model=loaded_model_gsm8k, tokenizer=loaded_tokenizer_gsm8k, device=0 if torch.cuda.is_available() else -1)

print("✅ Text generation pipelines created for both original and fine-tuned models.")

Device set to use cuda:0
Device set to use cuda:0


✅ Text generation pipelines created for both original and fine-tuned models.


# Step 5: Generate responses for sample questions


In [17]:
# Load the GSM8K dataset again to access the original columns
ds_original = load_dataset("openai/gsm8k", "main")

# Select a few sample questions from the original test dataset
sample_questions = ds_original["test"]["question"][:5]

original_responses = []
finetuned_responses = []

# Generate responses using the original model pipeline
print("Generating responses from the original model...")
for question in sample_questions:
    prompt_original = f"Question: {question}\n\nAnswer:"
    response = pipe_original(prompt_original, max_new_tokens=200, do_sample=False)[0]['generated_text']
    original_responses.append(response)
    print(f"Original model response for: {question}\n{response}\n---\n")

# Generate responses using the fine-tuned model pipeline
print("Generating responses from the fine-tuned model...")
for question in sample_questions:
    prompt_finetuned = f"Question: {question}\n\nAnswer:"
    response = pipe_finetuned(prompt_finetuned, max_new_tokens=200, do_sample=False)[0]['generated_text']
    finetuned_responses.append(response)
    print(f"Fine-tuned model response for: {question}\n{response}\n---\n")

print("✅ Responses generated from both models.")

The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


Generating responses from the original model...


`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`.
Caching is incompatible with gradient checkpointing in LlamaDecoderLayer. Setting `past_key_values=None`.


Original model response for: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?
Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?

Answer: Janet:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
---

Original model response for: A robe takes 2 bolts of blue fiber and half that much white fiber.  How many bolts in total does it take?
Question: A robe takes 2 bolts of blue fiber 

# Step 6: Compare the responses

In [18]:
print("Comparing Original vs. Fine-tuned Model Responses:")
print("-" * 50)

for i, question in enumerate(sample_questions):
    print(f"Question {i+1}: {question}")
    print(f"Original Model Response:\n{original_responses[i]}")
    print(f"Fine-tuned Model Response:\n{finetuned_responses[i]}")

    # Simple qualitative comparison - you can add more sophisticated checks here
    if original_responses[i] != finetuned_responses[i]:
        print("Observation: Responses differ.")
    else:
        print("Observation: Responses are the same.")

    if ":::" in original_responses[i] or len(original_responses[i]) < 50:
         print("Observation: Original model response seems incomplete or repetitive.")

    if ":::" not in finetuned_responses[i] and len(finetuned_responses[i]) > 50:
         print("Observation: Fine-tuned model response seems more complete and less repetitive.")
    else:
        print("Observation: Fine-tuned model response also seems incomplete or repetitive.")


    print("-" * 50)

print("✅ Analysis of generated responses completed.")

Comparing Original vs. Fine-tuned Model Responses:
--------------------------------------------------
Question 1: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?
Original Model Response:
Question: Janet’s ducks lay 16 eggs per day. She eats three for breakfast every morning and bakes muffins for her friends every day with four. She sells the remainder at the farmers' market daily for $2 per fresh duck egg. How much in dollars does she make every day at the farmers' market?

Answer: Janet:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
Fine-tuned Model Response:
Question: Janet’s ducks lay 16 eggs per day. She

## Summary:

### Data Analysis Key Findings

*   Both the original and fine-tuned models produced identical responses for the sample GSM8K questions.
*   The responses from both models were incomplete and repetitive, often ending with sequences of characters like ":::". This suggests that the fine-tuning did not improve the model's ability to generate complete or different answers for these specific samples.

### Insights or Next Steps

*   Investigate the fine-tuning process, hyperparameters, and training data to understand why the fine-tuned model did not show improved performance or different behavior compared to the raw model on these samples.
*   Consider evaluating on a larger, diverse set of questions and using quantitative metrics (e.g., accuracy on the final numerical answer) to get a more comprehensive view of the fine-tuned model's performance.
