<a href="https://colab.research.google.com/github/Adnan525/LLM_FineTuning/blob/main/Finetuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# -------------------------------------------------------------------------------------------------------------
# Project: Open Source Institute-Cognitive System of Machine Intelligent Computing (OpenSI-CoSMIC)
# Contributors:
#     Adnan, Muntasir <adnan.adnan@canberra.edu.au>
#     Kuhn, Carlos C. N. <Carlos.NoschangKuhn@canberra.edu.au>
#
# Copyright (c) 2025 Open Source Institute
#
# Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated
# documentation files (the "Software"), to deal in the Software without restriction, including without
# limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
# the Software, and to permit persons to whom the Software is furnished to do so, subject to the following
# conditions:
#
# The above copyright notice and this permission notice shall be included in all copies or substantial
# portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
# LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
# -------------------------------------------------------------------------------------------------------------

In [None]:
# =================================================================================
# COMPREHENSIVE LLM FINE-TUNING TUTORIAL
# =================================================================================
# A complete guide to fine-tuning language models using modern techniques:
# - LoRA (Low-Rank Adaptation)
# - 4-bit Quantization
# - Supervised Fine-Tuning (SFT)
# - Full tokenization and masking deep-dive

In [None]:
print("=" * 80)
print("SECTION 1: Installing Required Libraries")
print("=" * 80)

!pip install -q -U transformers==4.45.0
!pip install -q -U peft==0.13.0
!pip install -q -U accelerate==0.34.0
!pip install -q -U bitsandbytes==0.44.0
!pip install -q -U datasets==3.0.0
!pip install -q -U trl==0.11.0

print("\nAll libraries installed successfully!\n")

SECTION 1: Installing Required Libraries
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m122.4/122.4 MB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.4/316.4 kB[0m [31m9.4 MB/s[0m eta [36m0:00:00[0m
[?25h
All libraries installed successfully!



In [None]:
print("=" * 80)
print("SECTION 2: Importing Libraries")
print("=" * 80)

import torch
import pandas as pd
import numpy as np
from datasets import Dataset
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    TrainingArguments
)
from peft import LoraConfig, get_peft_model, prepare_model_for_kbit_training
from trl import SFTTrainer
import gc
import matplotlib.pyplot as plt

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"CUDA device: {torch.cuda.get_device_name(0)}")
    print(f"GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

print("\nImports completed!\n")

SECTION 2: Importing Libraries
PyTorch version: 2.8.0+cu126
CUDA available: True
CUDA device: Tesla T4
GPU memory: 15.83 GB

Imports completed!

