In [None]:
# Core stack
!pip install --upgrade --no-cache-dir \
    pandas==2.2.2 \
    numpy==1.25.3

# PyTorch + CUDA 12.4 ecosystem
!pip install --upgrade --no-cache-dir \
    --index-url https://download.pytorch.org/whl/cu124 \
    torch==2.6.0+cu124 \
    torchvision==0.21.0+cu124 \
    torchaudio==2.6.0+cu124

# fastai
!pip install fastai==2.7.19

# Hugging Face & RLHF tooling
!pip install --upgrade --no-cache-dir \
    transformers==4.46.0 \
    trl==0.16.1 \
    accelerate==1.6.0 \
    datasets==3.5.0 \
    tokenizers==0.20.3 \
    huggingface-hub==0.30.2 \
    bitsandbytes==0.39.0
# Peft
!pip install peft==0.4.0
# Filesystems
!pip install --upgrade --no-cache-dir \
    fsspec==2024.12.0 \
    gcsfs==2024.12.0 \
    packaging==24.2.0 \
    rich==13.7.1 \
    jedi>=0.16

# Firebase Admin SDK
!pip install firebase-admin==6.7.0

In [None]:
!pip show bitsandbytes
!pip install bitsandbytes --prefer-binary --no-cache-dir
!pip install bitsandbytes --upgrade
!python -m bitsandbytes

In [None]:
!pip install wandb
!pip install evaluate

import pandas as pd
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
import firebase_admin
from firebase_admin import credentials, db
import os
import torch
import wandb
from huggingface_hub import login, HfApi, create_repo
from datasets import Dataset
from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
from trl import DPOTrainer, DPOConfig
from transformers  import BitsAndBytesConfig
from peft import LoraConfig, get_peft_model, PeftModel
import gc

# Efficient memory allocation
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"


# Configure secrets and login
HF_TOKEN = "..."
FIREBASE_CREDENTIALS = {...}

login(HF_TOKEN)

# Cloud DB credentials
if not firebase_admin._apps:
    cred = credentials.Certificate(FIREBASE_CREDENTIALS)
    firebase_admin.initialize_app(cred, {
        'databaseURL': 'https://...'
    })


# Prepare RLHF dataset
def fetch_feedback():
    ref = db.reference('/feedback')
    data = ref.get() or {}
    recs = []
    for entry in data.values():
        sel = entry.get('responses', {}).get('selected', '')
        if sel in ('Response 1','Response 2'):
            r1, r2 = entry['responses']['response1'], entry['responses']['response2']
            chosen, rejected = (r1, r2) if sel=='Response 1' else (r2,r1)
            recs.append({'prompt':entry.get('prompt',''), 'chosen':chosen, 'rejected':rejected})
    return pd.DataFrame(recs)

rlhf_df = fetch_feedback()
if rlhf_df.empty: raise ValueError('No feedback.')
dpo_dataset = Dataset.from_pandas(rlhf_df)

# Tokenize dataset
base_model_name = 'ArsenKe/MT5_large_finetuned_chatbot'

tokenizer = AutoTokenizer.from_pretrained(base_model_name)
if tokenizer.pad_token is None:
    tokenizer.pad_token = tokenizer.eos_token

def preprocess(ex):
    return {
        'prompt_input_ids': tokenizer(ex['prompt'], padding='max_length', truncation=True, max_length=128).input_ids,
        'chosen_input_ids': tokenizer(ex['chosen'], padding='max_length', truncation=True, max_length=128).input_ids,
        'rejected_input_ids': tokenizer(ex['rejected'], padding='max_length', truncation=True, max_length=128).input_ids,
    }

tok_ds = dpo_dataset.map(preprocess)

# Model + Adapter setup
base = AutoModelForSeq2SeqLM.from_pretrained(base_model_name)
ref = AutoModelForSeq2SeqLM.from_pretrained(base_model_name)

peft_cfg = LoraConfig(r=16, lora_alpha=32, target_modules=['q','v'], task_type='SEQ_2_SEQ_LM')
model = get_peft_model(base, peft_cfg)
model.enable_input_require_grads()
model.gradient_checkpointing_enable()
ref.gradient_checkpointing_enable()


# DPO training
dpo_args = DPOConfig(
    output_dir='./dpo_out',
    num_train_epochs=4,
    learning_rate=1e-6,
    save_strategy='no',
    report_to='wandb',
    run_name='dpo_mt5',
    beta=1e-3
)
trainer = DPOTrainer(
    model=model,
    ref_model=ref,
    args=dpo_args,
    train_dataset=tok_ds,
    processing_class=tokenizer
)

gc.collect()
torch.cuda.empty_cache()
trainer.train()

# Merge and push merged model
merged = PeftModel.from_pretrained(base, './my_dpo_adapter')
full = merged.merge_and_unload()
full.push_to_hub('.../MT5_large_dpo_merged', use_auth_token=HF_TOKEN)


# Save adapter and tokenizer
tokenizer.save_pretrained('./mt5_dpo_adapter')
trainer.save_model('./mt5_dpo_adapter')







tokenizer_config.json:   0%|          | 0.00/19.3k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/4.31M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/16.4M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/416 [00:00<?, ?B/s]

Map:   0%|          | 0/116 [00:00<?, ? examples/s]

config.json:   0%|          | 0.00/830 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

Extracting prompt in train dataset:   0%|          | 0/116 [00:00<?, ? examples/s]

Applying chat template to train dataset:   0%|          | 0/116 [00:00<?, ? examples/s]

Tokenizing train dataset:   0%|          | 0/116 [00:00<?, ? examples/s]

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize?ref=models
[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: Currently logged in as: [33marsenke[0m ([33marsenke-fh-tech-wien[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Step,Training Loss
10,5.6204
20,5.6325


Step,Training Loss
10,5.6204
20,5.6325
30,5.5085
40,5.5753


ValueError: Can't find 'adapter_config.json' at './dpo_out'

In [None]:
# Merge and  push merged model
merged = PeftModel.from_pretrained(base, './dpo_adapter',is_trainable=False)
full = merged.merge_and_unload()
full.push_to_hub('ArsenKe/MT5_large_dpo_merged', use_auth_token=HF_TOKEN)


# Save adapter and tokenizer
tokenizer.save_pretrained('./dpo_adapter')
trainer.save_model('./dpo_adapter')





model.safetensors:   0%|          | 0.00/4.92G [00:00<?, ?B/s]