In [None]:
import torch
torch.cuda.empty_cache()
import gc
gc.collect()

print(torch.cuda.is_available())
print(torch.cuda.current_device())

True
0


In [None]:
!pip install -q peft
!pip install -q transformers datasets evaluate huggingface_hub accelerate gdown
!pip uninstall -y bitsandbytes
!pip install --no-cache-dir bitsandbytes
import os
import bitsandbytes as bnb
import torch
import gc
#import torchgradient_accumulation_steps
import pandas as pd
import gdown
import evaluate
from datasets import Dataset
from google.colab import auth, drive
from google.auth import default
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer,
    DataCollatorForSeq2Seq
)
from transformers import BitsAndBytesConfig
from peft import LoraConfig, TaskType
from peft import get_peft_model
from accelerate import infer_auto_device_map


# CUDA Memory Expansion
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# Mount Google Drive
drive.mount('/content/drive', force_remount=True)
auth.authenticate_user()
creds, _ = default()

# Download Dataset
file_id = '18cM4Z_GlgHdDruuTXPekmgKk6UU8IBxMjGnuUKZELh0'
output_file = "chatbot_training_data.xlsx"
gdown.download(id=file_id, output=output_file, quiet=False)

# Load Dataset
df = pd.read_excel(output_file)
df = df[['user_input', 'chatbot_response']].dropna().rename(
    columns={'user_input': 'prompt', 'chatbot_response': 'response'}
)
print("Data Loaded:")
print(df.head())

# Convert to Hugging Face Dataset
dataset = Dataset.from_pandas(df).shuffle(seed=42).select(range(20000))
del df
gc.collect()

# Split Dataset
split_dataset = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset, eval_dataset = split_dataset["train"], split_dataset["test"]

# Load Model and Tokenizer
model_name = "microsoft/DialoGPT-large"

# Load model
model = AutoModelForCausalLM.from_pretrained(model_name)

# infer the device map
device_map = infer_auto_device_map(model, max_memory={0: "22GB", "cpu": "20GB"})

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    load_in_8bit=True,  # 8-bit quantization
    device_map=device_map
)


tokenizer = AutoTokenizer.from_pretrained(model_name)
tokenizer.pad_token = tokenizer.eos_token
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",  # More efficient quantization
)
model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=bnb_config)

# "Size Mismatch" Errors
model.resize_token_embeddings(len(tokenizer))
model.config.use_cache = False
model.gradient_checkpointing_enable()
model.enable_input_require_grads()


# Load Evaluation Metric
metric = evaluate.load("perplexity")

def compute_metrics(eval_pred):
    logits, labels = eval_pred
    predictions = torch.argmax(torch.tensor(logits), dim=-1)
    return metric.compute(predictions=predictions, references=labels)

# Preprocessing Function
MAX_LENGTH = 64
def preprocess_function(examples):
    model_inputs = tokenizer(
        examples["prompt"],
        max_length=MAX_LENGTH,
        truncation=True,
        padding="max_length"
    )
    labels = tokenizer(
        examples["response"],
        max_length=MAX_LENGTH,
        truncation=True,
        padding="max_length"
    )["input_ids"]

    labels = [[-100 if token == tokenizer.pad_token_id else token for token in label] for label in labels]

    model_inputs["labels"] = labels
    return model_inputs

# Tokenize Dataset
tokenized_train_dataset = train_dataset.map(preprocess_function, batched=True, remove_columns=train_dataset.column_names)
tokenized_eval_dataset = eval_dataset.map(preprocess_function, batched=True, remove_columns=eval_dataset.column_names)
print("Tokenization completed!")



# Configure LoRA
peft_config = LoraConfig(
    task_type=TaskType.CAUSAL_LM,
    inference_mode=False,
    r=4,
    lora_alpha=8,
    lora_dropout=0.1,
)
# Apply LoRA to the model
model = get_peft_model(model, peft_config)
model.print_trainable_parameters()




training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="no",
    save_strategy="no",
    #save_total_limit=1,
    num_train_epochs=3,
    #per_device_train_batch_size=1,
    #per_device_eval_batch_size=1,
    #gradient_accumulation_steps=64,
    gradient_checkpointing=True,
    #optim="adamw_torch",
    learning_rate=2e-5,
    weight_decay=0.01,
    #logging_steps=600,
    fp16=True,
    bf16=False,
    report_to="wandb",
    run_name="DialoGPT_fully_finetuned_chatbot_v2"

)


[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m2.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m84.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m76.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m42.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m56.3/56.3 MB[0m [31m35.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.9/127.9 MB[0m [31m16.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Downloading...
From (original): https://drive.google.com/uc?id=18cM4Z_GlgHdDruuTXPekmgKk6UU8IBxMjGnuUKZELh0
From (redirected): https://docs.google.com/spreadsheets/d/18cM4Z_GlgHdDruuTXPekmgKk6UU8IBxMjGnuUKZELh0/export?format=xlsx
To: /content/chatbot_training_data.xlsx
1.55MB [00:01, 1.42MB/s]


✅ Data Loaded:
                                              prompt  \
0  User: Could you reserve the Silver Sightseeing...   
1  User: Can I reschedule my E-Rickshaw Tour to a...   
2  User: I need to adjust the number of people fo...   
3  User: Can you recommend a budget-friendly tour...   
4  User: Can I downgrade my Gold Sightseeing Tour...   

                                            response  
0  Chatbot: Of course! The Silver Sightseeing Tou...  
1  Chatbot: No problem! What new date works for y...  
2  Chatbot: Got it! How many people now for the P...  
3  Chatbot: The Silver Sightseeing Tour is perfec...  
4  Chatbot: Yes, we can switch you to a more affo...  


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.75G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.75G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The `load_in_4bit` and `load_in_8bit` arguments are deprecated and will be removed in the future versions. Please, pass a `BitsAndBytesConfig` object in `quantization_config` argument instead.


tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

`low_cpu_mem_usage` was None, now default to True since model is quantized.


Downloading builder script:   0%|          | 0.00/8.46k [00:00<?, ?B/s]

Map:   0%|          | 0/16000 [00:00<?, ? examples/s]

Map:   0%|          | 0/4000 [00:00<?, ? examples/s]

✅ Tokenization completed!
trainable params: 737,280 || all params: 774,767,360 || trainable%: 0.0952




In [None]:
!nvidia-smi

import torch
print(torch.cuda.is_available())
from peft import get_peft_model, LoraConfig, TaskType
import time
import IPython
import threading

import os
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

torch.cuda.empty_cache()
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.benchmark = True


def keep_colab_alive():
    while True:
        time.sleep(600)
        display(IPython.display.Javascript('''
            const connectButton = document.querySelector("#connect")
            if (connectButton && connectButton.style.display !== "none") {
                console.log("Reconnecting to runtime...");
                connectButton.click();
            }
        '''))
        print("Runtime check complete.")

thread = threading.Thread(target=keep_colab_alive)
thread.daemon = True
thread.start()


data_collator = DataCollatorForSeq2Seq(tokenizer, model=model)

torch.cuda.empty_cache()
gc.collect()

# Training
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_dataset,
    eval_dataset=tokenized_eval_dataset,
    compute_metrics=compute_metrics,
    data_collator=data_collator,
    #device="cuda"

)
model = torch.compile(model)

print("Starting Training...")
trainer.train()
print("Training Completed!")

print("Saving Model Locally...")
trainer.save_model("./fine_tuned_dialoGPT_v2")
tokenizer.save_pretrained("./fine_tuned_dialoGPT_v2")

print(" Model and tokenizer Saved!")

# Upload Model to Hugging Face Hub
from huggingface_hub import HfApi, create_repo
api = HfApi()

repo_name = ".../DialoGPT_finetuned_chatbot"
create_repo(repo_name, exist_ok=True)

print("Pushing Model to Hugging Face Hub...")
api.upload_folder(
    folder_path="./fine_tuned_dialoGPT_v2",
    repo_id=repo_name,
    repo_type="model",
    token=True

)
print(f"Model Uploaded: https://huggingface.co/{repo_name}")

Sun Mar 30 17:23:24 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA L4                      Off |   00000000:00:03.0 Off |                    0 |
| N/A   51C    P0             28W /   72W |    1825MiB /  23034MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

No label_names provided for model class `PeftModelForCausalLM`. Since `PeftModel` hides base models input arguments, if label_names is not given, label_names can't be set automatically within `Trainer`. Note that empty label_names list will be used instead.


🚀 Starting Training...


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.


<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
wandb: Paste an API key from your profile and hit enter:

 ··········


[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33marsenke[0m ([33marsenke-fh-tech-wien[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


`loss_type=None` was set in the config but it is unrecognised.Using the default loss: `ForCausalLMLoss`.


Step,Training Loss
500,10.746
1000,7.3055
1500,6.3144
2000,5.977
2500,5.7943
3000,5.6498
3500,5.5587
4000,5.4242
4500,5.3784
5000,5.3124


<IPython.core.display.Javascript object>

Runtime check complete.


<IPython.core.display.Javascript object>

Runtime check complete.
✅ Training Completed!
Saving Model Locally...
✅ Model and tokenizer Saved!


HfHubHTTPError: 401 Client Error: Unauthorized for url: https://huggingface.co/api/repos/create (Request ID: Root=1-67e984b5-67e72dcc3b9472de181196aa;75dfda72-ffe3-426b-bbf7-4ec6813de443)

Invalid username or password.

In [None]:
from huggingface_hub import HfApi, create_repo
api = HfApi()

repo_name = ".../DialoGPT_finetuned_chatbot"
create_repo(repo_name, exist_ok=True)

print("🚀 Pushing Model to Hugging Face Hub...")
api.upload_folder(
    folder_path="./fine_tuned_dialoGPT_v2",
    repo_id=repo_name,
    repo_type="model",
    token=True
)
print(f"✅ Model Uploaded: https://huggingface.co/{repo_name}")

🚀 Pushing Model to Hugging Face Hub...


adapter_model.safetensors:   0%|          | 0.00/2.96M [00:00<?, ?B/s]

training_args.bin:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

✅ Model Uploaded: https://huggingface.co/ArsenKe/DialoGPT_finetuned_chatbot


In [None]:
# Push to Hugging Face
!pip install --upgrade huggingface_hub -q
!huggingface-cli login

from huggingface_hub import HfApi, create_repo

gc.collect()

print("Verifying saved files:")
!ls -lh ./fine_tuned_dialoGPT

api = HfApi()

print("Creating repository '.../DialoGPT_RLHF_project_v2'...")
try:
    create_repo(repo_id=".../DialoGPT_RLHF_project_v2", repo_type="model", exist_ok=True)
    print("Repository created or already exists!")
except Exception as e:
    print(f"Failed to create repo: {e}")
    raise

print("Pushing to Hugging Face...")
try:
    api.upload_folder(
        folder_path="./fine_tuned_dialoGPT_v2",
        repo_id="ArsenKe/DialoGPT_RLHF_project_v2",
        repo_type="model",
        token=True
    )
    print("✅ Model pushed to Hugging Face!")
except Exception as e:
    print(f"Push failed: {e}")

print("Final memory check:")
!free -h


    _|    _|  _|    _|    _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|_|_|_|    _|_|      _|_|_|  _|_|_|_|
    _|    _|  _|    _|  _|        _|          _|    _|_|    _|  _|            _|        _|    _|  _|        _|
    _|_|_|_|  _|    _|  _|  _|_|  _|  _|_|    _|    _|  _|  _|  _|  _|_|      _|_|_|    _|_|_|_|  _|        _|_|_|
    _|    _|  _|    _|  _|    _|  _|    _|    _|    _|    _|_|  _|    _|      _|        _|    _|  _|        _|
    _|    _|    _|_|      _|_|_|    _|_|_|  _|_|_|  _|      _|    _|_|_|      _|        _|    _|    _|_|_|  _|_|_|_|

    To log in, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
Enter your token (input will not be visible): 
Add token as git credential? (Y/n) n
Token is valid (permission: fineGrained).
The token `finetuned-LLM` has been saved to /root/.cache/huggingface/stored_tokens
Your token has been saved to /root/.cache/huggingface/token
Login successful.
The current active token is: `fine

Upload 2 LFS files:   0%|          | 0/2 [00:00<?, ?it/s]

training_args.bin:   0%|          | 0.00/5.37k [00:00<?, ?B/s]

adapter_model.safetensors:   0%|          | 0.00/2.96M [00:00<?, ?B/s]

✅ Model pushed to Hugging Face!
Final memory check:
               total        used        free      shared  buff/cache   available
Mem:            52Gi       7.0Gi        16Gi        16Mi        29Gi        45Gi
Swap:             0B          0B          0B


# Load of  PEFT Model from Hugging Face

In [None]:

!pip install -U peft transformers accelerate
!huggingface-cli login

!pip install peft transformers accelerate huggingface_hub
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
from peft import PeftModel

base_model_name = "microsoft/DialoGPT-large"
peft_model_name = "ArsenKe/DialoGPT_finetuned_chatbot"

tokenizer = AutoTokenizer.from_pretrained(base_model_name)

model = AutoModelForCausalLM.from_pretrained(
    base_model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

tokenizer_peft = AutoTokenizer.from_pretrained(peft_model_name)
model.resize_token_embeddings(len(tokenizer_peft))

# Load PEFT adapter
model = PeftModel.from_pretrained(model, peft_model_name, is_trainable=False)

# Merge LoRA weights
model = model.merge_and_unload()

print("✅ PEFT Model Loaded Successfully!")

Collecting peft
  Downloading peft-0.15.0-py3-none-any.whl.metadata (13 kB)
Collecting transformers
  Downloading transformers-4.50.0-py3-none-any.whl.metadata (39 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.13.0->peft)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.13.0->peft)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.13.0->peft)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
C

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/614 [00:00<?, ?B/s]

vocab.json: 0.00B [00:00, ?B/s]

merges.txt: 0.00B [00:00, ?B/s]

config.json:   0%|          | 0.00/642 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/1.75G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.75G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/100 [00:00<?, ?B/s]

OSError: Can't load tokenizer for 'ArsenKe/DialoGPT_finetuned_chatbot'. If you were trying to load it from 'https://huggingface.co/models', make sure you don't have a local directory with the same name. Otherwise, make sure 'ArsenKe/DialoGPT_finetuned_chatbot' is the correct path to a directory containing all relevant files for a GPT2TokenizerFast tokenizer.

In [None]:
def generate_response(input_text, max_length=100):
    inputs = tokenizer(input_text, return_tensors="pt").to("cuda")
    outputs = model.generate(inputs.input_ids, max_length=max_length)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response

input_text = "Hello, how can I help you today?"
response = generate_response(input_text)
print(f" Response: {response}")
