In [2]:
import os
import subprocess
import time
from pathlib import Path
from huggingface_hub import hf_hub_download
import shutil

# Base directory and configuration
BASE_DIR = "/lustre/fsw/portfolios/llmservice/users/ahazare"
LOG_DIR = "/lustre/fsw/portfolios/llmservice/users/ahazare/gtc_paris/logs"
SAFETY_DATASET_NAME = "nemo_safety_blend_v0.2.2.jsonl"
POST_TRAINING_DATASET_NAME = "nvidia/Llama-Nemotron-Post-Training-Dataset"
MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Llama-8B"
SAFETY_MODEL_NAME = "/lustre/fsw/portfolios/llmservice/users/ahazare/cache/huggingface/llama-3.1-nemoguard-8b-content-safety"

# Credentials
os.environ.update({
    "HF_TOKEN":"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",
    "WANDB_API_KEY": "3995b8d5d51dde52b47817ff67f76790aa594807"
})

In [3]:
# Set environment variables
os.environ.update({
    'TMPDIR': f"{BASE_DIR}/tmp",
    'XDG_CACHE_HOME': f"{BASE_DIR}/cache",
    'HF_HOME': f"{BASE_DIR}/cache/huggingface",
    'UV_CACHE_DIR': f"{BASE_DIR}/cache/uv",
    'TRITON_CACHE_DIR': f"{BASE_DIR}/cache/triton",
    'DATASET_CACHE_DIR': f"{BASE_DIR}/dataset_cache",
    'RAY_TMPDIR': "/tmp/ray_ahazare",
    'LOG_DIR': f"{LOG_DIR}",

})

# Create directories
for dir_path in [os.environ['TMPDIR'], os.environ['XDG_CACHE_HOME'], os.environ['HF_HOME'],
                 os.environ['UV_CACHE_DIR'],os.environ['TRITON_CACHE_DIR'], os.environ['DATASET_CACHE_DIR'], 
                 os.environ['RAY_TMPDIR'], os.environ['LOG_DIR']]:
    Path(dir_path).mkdir(parents=True, exist_ok=True)

In [22]:
# 1. Download NV Safety Dataset
os.chdir(f"{BASE_DIR}/NeMo-Safety/notebooks")
result = subprocess.run([
    'python3', 'safety_dataset_blend_generation.py',
    '--filename', os.path.join(os.environ['DATASET_CACHE_DIR'], SAFETY_DATASET_NAME),
    '--total_samples', '2000',
    '--sampling_method', 'uniform',
    '--cache_dir', os.environ['DATASET_CACHE_DIR']
], check=True)

fatal: destination path 'eval-safety' already exists and is not an empty directory.



Starting dataset collection...
Output file: /lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/nemo_safety_blend_v0.2.2.jsonl
Target sample size: 2,000
Sampling method: uniform
Using cache directory: /lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache

Downloading Aegis v2 dataset...
Source: nvidia/Aegis-AI-Content-Safety-Dataset-2.0

Dataset: Aegis v2
Number of samples: 23,077


Downloading Gretel Safety Alignment v1 dataset...
Source: gretelai/gretel-safety-alignment-en-v1

Dataset: Gretel Safety Alignment v1
Number of samples: 5,997


Downloading Harmful Tasks dataset...
Source: CrystalEye42/eval-safety
Processing Harmful Tasks data...


Processing categories: 100%|██████████| 5/5 [00:00<00:00, 66156.21it/s]
Processing tasks: 100%|██████████| 11/11 [00:00<00:00, 31709.51it/s]



Dataset: Harmful Tasks
Number of samples: 1,650


Downloading RedTeam 2k dataset...
Source: JailbreakV-28K/JailBreakV-28k

Dataset: RedTeam 2k
Number of samples: 582


Combining datasets...

Original Dataset Distribution
Dataset                             Count   Percentage
--------------------------------------------------------------------------------
Aegis v2                           23,077       73.74%
Gretel Safety Alignment v1          5,994       19.15%
HarmfulTasks                        1,650        5.27%
RedTeam 2k                            573        1.83%
--------------------------------------------------------------------------------
Total                              31,294      100.00%

Saving full dataset to /lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/nemo_safety_blend_v0.2.2.jsonl...
Full dataset saved with 31,294 samples

Performing uniform sampling across categories...
RedTeam 2k: 500 samples selected
HarmfulTasks: 500 samples selected
Gretel Sa

Sampling from categories: 100%|██████████| 4/4 [00:00<00:00, 546.67it/s]


In [9]:
# 2. Download Llama Nemotron Post-training Dataset

files = [
    "SFT/math/math_v1.1.jsonl",
    "SFT/code/code_v1.1.jsonl",
    "SFT/chat/chat.jsonl",
    "SFT/science/science.jsonl"
]

LLAMA_NEMO_DIR = f"{os.environ['DATASET_CACHE_DIR']}/Llama-Nemotron-Post-Training-Dataset"
Path(LLAMA_NEMO_DIR).mkdir(parents=True, exist_ok=True)

for file in files:
    print(f"Downloading {file}...")
    downloaded_path = hf_hub_download(
        repo_id=POST_TRAINING_DATASET_NAME,
        filename=file,
        repo_type='dataset',
        cache_dir=os.environ['DATASET_CACHE_DIR']
    )
    
    filename = Path(file).name
    target_path = f"{LLAMA_NEMO_DIR}/{filename}"
    
    # Count lines and sample
    with open(downloaded_path, 'r') as f:
        total_lines = sum(1 for _ in f)
    
    print(f"Total lines in file: {total_lines}")
    
    if total_lines > 1000:
        # Use shuf for random sampling
        subprocess.run(['shuf', '-n', '1000', downloaded_path], stdout=open(target_path, 'w'), check=True)
        print(f"Extracted 1000 random samples to {target_path}")
    else:
        shutil.copy2(downloaded_path, target_path)
        print(f"File has fewer than 1000 lines, copied all {total_lines} lines")

Downloading SFT/math/math_v1.1.jsonl...
Total lines in file: 10000
Extracted 1000 random samples to /lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/Llama-Nemotron-Post-Training-Dataset/math_v1.1.jsonl
Downloading SFT/code/code_v1.1.jsonl...
Total lines in file: 10000
Extracted 1000 random samples to /lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/Llama-Nemotron-Post-Training-Dataset/code_v1.1.jsonl
Downloading SFT/chat/chat.jsonl...
Total lines in file: 10000
Extracted 1000 random samples to /lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/Llama-Nemotron-Post-Training-Dataset/chat.jsonl
Downloading SFT/science/science.jsonl...
Total lines in file: 10000
Extracted 1000 random samples to /lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/Llama-Nemotron-Post-Training-Dataset/science.jsonl


In [23]:
# 3. Combine datasets
OUTPUT_DIR = f"{os.environ['DATASET_CACHE_DIR']}/sft_data"
subprocess.run([
    'python3', 'combine_datasets.py',
    '--safety_file', f"{os.environ['DATASET_CACHE_DIR']}/nemo_safety_blend_v0.2.2_sampled_2000.jsonl",
    '--llama_nemo_dir', LLAMA_NEMO_DIR,
    '--output_dir', OUTPUT_DIR,
    '--val_split', '0.03',
    '--max_tokens', '16384',
    '--max_samples', '5000'
], check=True)


Loading safety dataset from /lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/nemo_safety_blend_v0.2.2_sampled_2000.jsonl...
Loaded 2000 samples from safety dataset

Found 4 Llama-Nemotron files: ['/lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/Llama-Nemotron-Post-Training-Dataset/math_v1.1.jsonl', '/lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/Llama-Nemotron-Post-Training-Dataset/code_v1.1.jsonl', '/lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/Llama-Nemotron-Post-Training-Dataset/chat.jsonl', '/lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/Llama-Nemotron-Post-Training-Dataset/science.jsonl']

Loading /lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/Llama-Nemotron-Post-Training-Dataset/math_v1.1.jsonl as 'math_v1'...
Added 999/1000 items from math_v1

Loading /lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/Llama-Nemotron-Post-Training-Dataset/code_v1.1.jsonl as 'code_v1'...
Added 897/1

CompletedProcess(args=['python3', 'combine_datasets.py', '--safety_file', '/lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/nemo_safety_blend_v0.2.2_sampled_2000.jsonl', '--llama_nemo_dir', '/lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/Llama-Nemotron-Post-Training-Dataset', '--output_dir', '/lustre/fsw/portfolios/llmservice/users/ahazare/dataset_cache/sft_data', '--val_split', '0.03', '--max_tokens', '16384', '--max_samples', '5000'], returncode=0)

In [24]:
# 4. Start vLLM servers
os.environ.update({
    'VLLM_ENGINE_ITERATION_TIMEOUT_S': '36000',
    'VLLM_ALLOW_LONG_MAX_MODEL_LEN': '1',
    'VLLM_HOST': '0.0.0.0',
    'VLLM_TENSOR_PARALLEL_SIZE': '1',
    'POLICY_MODEL_GPUS': '0,1,2,3',
    'SAFETY_MODEL_GPUS': '4,5'
})

print("Starting policy model server...")
policy_server = subprocess.Popen([
    'python3', '-m', 'vllm.entrypoints.openai.api_server',
    '--model', MODEL_NAME,
    '--trust-remote-code',
    '--seed', '1',
    '--host', os.environ['VLLM_HOST'],
    '--port', '5000',
    '--served-model-name', 'test-model',
    '--enable-reasoning', 
    '--reasoning-parser', 'deepseek_r1',
    '--tensor-parallel-size', os.environ['VLLM_TENSOR_PARALLEL_SIZE'],
    '--download-dir', os.environ['HF_HOME']
], env={**os.environ, 'CUDA_VISIBLE_DEVICES': os.environ['POLICY_MODEL_GPUS']},
   stdout=open(f"{LOG_DIR}/vllm-server-model.log", 'w'),
   stderr=subprocess.STDOUT)

print("Starting safety model server...")
safety_server = subprocess.Popen([
    'python3', '-m', 'vllm.entrypoints.openai.api_server',
    '--model', SAFETY_MODEL_NAME,
    '--trust-remote-code',
    '--seed', '1',
    '--host', os.environ['VLLM_HOST'],
    '--port', '6000',
    '--served-model-name', 'safety-model',
    '--tensor-parallel-size', os.environ['VLLM_TENSOR_PARALLEL_SIZE'],
    '--download-dir', os.environ['HF_HOME']
], env={**os.environ, 'CUDA_VISIBLE_DEVICES': os.environ['SAFETY_MODEL_GPUS']},
   stdout=open(f"{LOG_DIR}/vllm-server-safety.log", 'w'),
   stderr=subprocess.STDOUT)

# Cleanup vLLM servers
# subprocess.run(['pkill', '-f', 'vllm.entrypoints.openai.api_server'])

Starting policy model server...
Starting safety model server...


In [16]:
# Cleanup vLLM servers
# subprocess.run(['pkill', '-f', 'vllm.entrypoints.openai.api_server'])

CompletedProcess(args=['pkill', '-f', 'vllm.entrypoints.openai.api_server'], returncode=1)

In [39]:
# Kill on-policy
subprocess.run(['pkill', '-f', 'generate_on_policy_data.py'])

CompletedProcess(args=['pkill', '-f', 'generate_on_policy_data.py'], returncode=1)

In [25]:
# 5. Generate on-policy data

# On-Policy Data Generation parameters
CONCURRENCY = 16
MAX_ATTEMPTS = 3
BATCH_SIZE = 96

MAX_TOKENS = 512
TEMPERATURE = 0.7
TOP_P = 0.9

print("Generating on-policy data...")
for dataset_type in ['train', 'val']:
    input_dataset = f"{OUTPUT_DIR}/{dataset_type}.jsonl"
    output_file = f"{OUTPUT_DIR}/{dataset_type}_on_policy_data.jsonl"
    DATASET_TYPE = dataset_type
    subprocess.run([
        'python3', 'generate_on_policy_data.py',
        '--model_name', MODEL_NAME,
        '--safety_model', SAFETY_MODEL_NAME,
        '--huggingface_token', os.environ['HF_TOKEN'],
        '--vllm_host', os.environ['VLLM_HOST'],
        '--vllm_model_port', '5000',
        '--vllm_safety_port', '6000',
        '--concurrency', str(CONCURRENCY),
        '--input_dataset', input_dataset,
        '--output', output_file,
        '--batch_size', str(BATCH_SIZE),
        '--max_tokens', str(MAX_TOKENS),
        '--temperature', str(TEMPERATURE),
        '--top_p', str(TOP_P)
    ], stdout=open(f"{LOG_DIR}/{DATASET_TYPE}_on-policy.log", 'w'),
                   stderr=subprocess.STDOUT)

print("Data is Ready")
    
# Cleanup vLLM servers
# subprocess.run(['pkill', '-f', 'vllm.entrypoints.openai.api_server'])

Generating on-policy data...


In [27]:
# Cleanup vLLM servers
subprocess.run(['pkill', '-f', 'vllm.entrypoints.openai.api_server'])

CompletedProcess(args=['pkill', '-f', 'vllm.entrypoints.openai.api_server'], returncode=0)

In [30]:
# !ps -aux | grep python

In [4]:
# 6. Run SFT
print("Running SFT...")
os.chdir(f"{BASE_DIR}/NeMo-RL")
# Set up model directory environment variable
MODEL_DIR = f"{BASE_DIR}/NeMo-RL/results/sft_deepseek_8b_trial_step_300/step_50"

subprocess.run(['uv', 'run', 'python', 'examples/run_sft.py', 
                '--config', f"{BASE_DIR}/gtc_paris/deepseek_sft.yaml"
               ], 
               env={**os.environ, 'TMPDIR': os.environ['RAY_TMPDIR']},
               stdout=open(f"{MODEL_DIR}/sft.log", 'w'),
               check=True)

Running SFT...




KeyboardInterrupt: 

Traceback (most recent call last):
  File "/lustre/fsw/portfolios/llmservice/users/ahazare/NeMo-RL/examples/run_sft.py", line 22, in <module>
    from transformers import AutoTokenizer
  File "/lustre/fsw/portfolios/llmservice/users/ahazare/NeMo-RL/.venv/lib/python3.12/site-packages/transformers/__init__.py", line 26, in <module>
    from . import dependency_versions_check
  File "/lustre/fsw/portfolios/llmservice/users/ahazare/NeMo-RL/.venv/lib/python3.12/site-packages/transformers/dependency_versions_check.py", line 16, in <module>
    from .utils.versions import require_version, require_version_core
  File "/lustre/fsw/portfolios/llmservice/users/ahazare/NeMo-RL/.venv/lib/python3.12/site-packages/transformers/utils/__init__.py", line 25, in <module>
    from .chat_template_utils import DocstringParsingException, TypeHintParsingException, get_json_schema
  File "/lustre/fsw/portfolios/llmservice/users/ahazare/NeMo-RL/.venv/lib/python3.12/site-packages/transformers/utils/chat_template

In [6]:
# 7. Convert checkpoint
MODEL_DIR = f"{BASE_DIR}/NeMo-RL/results/sft_deepseek_8b_trial_step_300/step_50"
DCP_CKPT_PATH = f"{MODEL_DIR}/policy/weights/"
CONFIG_PATH = f"{MODEL_DIR}/config.yaml"
HF_CKPT_PATH = f"{MODEL_DIR}/hf_ckpt"

print("Converting checkpoint...")
os.chdir(f"{BASE_DIR}/NeMo-RL")
subprocess.run([
    'uv', 'run', 'examples/convert_dcp_to_hf.py',
    '--config', CONFIG_PATH,
    '--dcp-ckpt-path', DCP_CKPT_PATH,
    '--hf-ckpt-path', HF_CKPT_PATH
], check=True)

# Verify conversion
if Path(f"{HF_CKPT_PATH}/pytorch_model.bin").exists() and Path(f"{HF_CKPT_PATH}/config.json").exists():
    print("Conversion successful!")
    print(f"The HuggingFace model is now available at: {HF_CKPT_PATH}")
else:
    print("Conversion may have failed. Please check the output.")

Converting checkpoint...




tokenizer_name_or_path: deepseek-ai/deepseek-r1-distill-llama-8b
Saved HF checkpoint to: /lustre/fsw/portfolios/llmservice/users/ahazare/NeMo-RL/results/sft_deepseek_8b_trial_step_300/step_50/hf_ckpt
Conversion successful!
The HuggingFace model is now available at: /lustre/fsw/portfolios/llmservice/users/ahazare/NeMo-RL/results/sft_deepseek_8b_trial_step_300/step_50/hf_ckpt
