In [1]:
# -------------------------------------------------------------------------------------------------------------
# Project: Open Source Institute-Cognitive System of Machine Intelligent Computing (OpenSI-CoSMIC)
# Contributors:
#     Muntasir Adnan <adnan.adnan@canberra.edu.au>
# 
# Copyright (c) 2025 Open Source Institute
# 
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without
# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
# conditions:
# 
# The above copyright notice and this permission notice shall be included in all copies or substantial
# portions of the Software.
# 
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# -------------------------------------------------------------------------------------------------------------

In [2]:
def print_title(section_num: int, description: str) ->  None:
    print("-"*80)
    print(f"Section {section_num}: {description}")
    print("-"*80)

# SECTION 1: VERIFY ENVIRONMENT

In [3]:
print_title(1, "Environment")

import sys
print(f"Python version: {sys.version}")
print(f"Python executable: {sys.executable}")

import torch
print(f"\nPyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU device: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")
    print(f"Current GPU memory allocated: {torch.cuda.memory_allocated(0) / 1e9:.2f} GB")
else:
    print("\nWARNING: CUDA not available! This tutorial requires a GPU.")

--------------------------------------------------------------------------------
Section 1: Environment
--------------------------------------------------------------------------------
Python version: 3.10.19 (main, Oct 21 2025, 16:43:05) [GCC 11.2.0]
Python executable: /home/adnana/miniconda3/envs/llm/bin/python3.10

PyTorch version: 2.1.0+cu121
CUDA available: True
CUDA version: 12.1
GPU device: NVIDIA A100 80GB PCIe
GPU memory: 85.09 GB
Current GPU memory allocated: 0.00 GB


# SECTION 2: IMPORTS

In [4]:
print_title(2, "Importing Libraries")

import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments,
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
import gc
import matplotlib.pyplot as plt
from datetime import datetime

print("All imports successful!\n")

--------------------------------------------------------------------------------
Section 2: Importing Libraries
--------------------------------------------------------------------------------
All imports successful!



# SECTION 3: LOAD DATASET

In [5]:
print_title(3, "Dataset")

synthetic_data = {
    'q': [
        'Write a function to add two numbers',
        'Create a function to check if a number is even',
        'Write a function to reverse a string',
        'Create a function to find the maximum in a list',
        'Write a function to calculate factorial',
        'Create a function to check if a number is prime',
        'Write a function to find the sum of a list',
        'Create a function to sort a dictionary by values',
        'Write a function to remove duplicates from a list',
        'Create a function to check if a string is a palindrome',
    ] * 10,
    'a': [
        'def add_numbers(a, b):\n    return a + b',
        'def is_even(n):\n    return n % 2 == 0',
        'def reverse_string(s):\n    return s[::-1]',
        'def find_max(lst):\n    return max(lst)',
        'def factorial(n):\n    if n <= 1:\n        return 1\n    return n * factorial(n-1)',
        'def is_prime(n):\n    if n < 2:\n        return False\n    for i in range(2, int(n**0.5) + 1):\n        if n % i == 0:\n            return False\n    return True',
        'def sum_list(lst):\n    return sum(lst)',
        'def sort_dict_by_value(d):\n    return dict(sorted(d.items(), key=lambda x: x[1]))',
        'def remove_duplicates(lst):\n    return list(set(lst))',
        'def is_palindrome(s):\n    return s == s[::-1]',
    ] * 10
}

df = pd.DataFrame(synthetic_data)

print(f"Dataset loaded: {len(df)} examples")
print(f"Columns: {df.columns.tolist()}")
print("\nFirst 3 examples:")
for idx in range(3):
    print(f"\n--- Example {idx+1} ---")
    print(f"Question: {df.iloc[idx]['q']}")
    print(f"Answer: {df.iloc[idx]['a']}")

print("\nDataset loaded successfully!\n")

--------------------------------------------------------------------------------
Section 3: Dataset
--------------------------------------------------------------------------------
Dataset loaded: 100 examples
Columns: ['q', 'a']

First 3 examples:

--- Example 1 ---
Question: Write a function to add two numbers
Answer: def add_numbers(a, b):
    return a + b

--- Example 2 ---
Question: Create a function to check if a number is even
Answer: def is_even(n):
    return n % 2 == 0

--- Example 3 ---
Question: Write a function to reverse a string
Answer: def reverse_string(s):
    return s[::-1]

Dataset loaded successfully!



# SECTION 4: TOKENIZER LOADING

In [6]:
print_title(4, "Tokenizer Loading")

model_name = "Qwen/Qwen2.5-0.5B"
# Load tokenizer
tokenizer = AutoTokenizer.from_pretrained(
    model_name, 
    trust_remote_code=True,
    cache_dir="./model_cache"
)

if tokenizer.pad_token is None:
    print(f"PAD token is not dest by default for model: {model_name}")
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.pad_token_id = tokenizer.eos_token_id

print("Tokenizer loaded!")
print(f"Vocabulary size: {len(tokenizer):,}")
# https://huggingface.co/transformers/v3.2.0/main_classes/tokenizer.html#:~:text=model_max_length%20(%20int%20%2C%20optional%20)%20%E2%80%93,inputs%20to%20the%20transformer%20model.
print(f"Model max length: {tokenizer.model_max_length}")
print(f"BOS token: '{tokenizer.bos_token}' (ID: {tokenizer.bos_token_id})")
print(f"EOS token: '{tokenizer.eos_token}' (ID: {tokenizer.eos_token_id})")
print(f"PAD token: '{tokenizer.pad_token}' (ID: {tokenizer.pad_token_id})")

--------------------------------------------------------------------------------
Section 4: Tokenizer Loading
--------------------------------------------------------------------------------
PAD token is not dest by default for model: microsoft/phi-2
Tokenizer loaded!
Vocabulary size: 50,295
Model max length: 2048
BOS token: '<|endoftext|>' (ID: 50256)
EOS token: '<|endoftext|>' (ID: 50256)
PAD token: '<|endoftext|>' (ID: 50256)


In [7]:
# tokenizer

# SECTION 5: QUANTIZATION CONFIGURATION

In [8]:
print_title(5, "Quantization Configuration")

print("Configuring 4-bit quantization...")
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,                      # Enable 4-bit loading
    bnb_4bit_quant_type="nf4",              # Use NormalFloat4 quantization
    bnb_4bit_compute_dtype=torch.bfloat16,  # Compute in bfloat16 for stability
    bnb_4bit_use_double_quant=True,         # Double quantization for more memory savings
)

--------------------------------------------------------------------------------
Section 5: Quantization Configuration
--------------------------------------------------------------------------------
Configuring 4-bit quantization...


# SECTION 5: MODEL

In [None]:
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    quantization_config=bnb_config,
    device_map="auto",                      # Automatically place on GPU
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
    cache_dir="./model_cache"
)

print("\nModel loaded successfully!")

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

Cancellation requested; stopping current tasks.


In [None]:
print("\n" + "-" * 80)
print("MODEL ARCHITECTURE OVERVIEW")
print("-" * 80)
print(model)

In [None]:
total_params = sum(p.numel() for p in model.parameters())
print(f"\nTotal parameters: {total_params:,} ({total_params/1e9:.2f}B)")

# Memory footprint
memory_footprint = model.get_memory_footprint() / 1e9
print(f"Model memory footprint: {memory_footprint:.2f} GB")
available_gpu = torch.cuda.get_device_properties(0).total_memory / 1e9:.2f
print(f"Available GPU memory: {available_gpu} GB")
print(f"Memory usage: {memory_footprint / available_gpu * 100:.1f}% of {available_gpu}GB")