In [5]:
import os
import json

# 配置路径
DATA_PATH = "/mnt/lingjiejiang/multimodal_code/data/dpo/merged_html_chart_150k.json"
OUTPUT_DIR = "qwen_bash"
PYTHON_SCRIPT = "data_process/dpo_openmodel/html_generate.py"
MODEL_NAME = "Qwen2-VL-72B-Instruct"
MODEL_PATH = "/mnt/lingjiejiang/multimodal_code/checkpoints/llms/Qwen2-VL-72B-Instruct"
CUDA_DEVICES = "0,1,2,3,4,5,6,7"

def get_total_lines(file_path):
    """ 计算 JSON 文件的总数据量 """
    with open(file_path, "r") as f:
        data = json.load(f)
    return len(data)

def generate_bash_scripts(machine_num):
    """ 生成 run_X.sh 脚本，并在 {MODEL_NAME}_log/ 目录下记录日志 """
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    log_dir = f"/mnt/lingjiejiang/multimodal_code/data/dpo/{MODEL_NAME}_log"
    os.makedirs(log_dir, exist_ok=True)  # 创建日志目录

    total_lines = get_total_lines(DATA_PATH)
    batch_size = total_lines // machine_num  # 每台机器的索引范围

    for i in range(machine_num):
        start_index = i * batch_size
        end_index = total_lines if i == machine_num - 1 else (i + 1) * batch_size  # 最后一台机器处理剩余部分

        log_file = f"{log_dir}/dpo_{start_index}_{end_index}.log"
        script_name = os.path.join(OUTPUT_DIR, f"run_{i+1}.sh")

        with open(script_name, "w") as f:
            f.write(f"""#!/bin/bash

export CUDA_VISIBLE_DEVICES={CUDA_DEVICES}

python {PYTHON_SCRIPT} \\
    --model_name {MODEL_NAME} \\
    --model_path {MODEL_PATH} \\
    --start_index {start_index} \\
    --end_index {end_index} | tee -a {log_file}

python run_gpu.py
""")
        # os.chmod(script_name, 0o755)  # 赋予可执行权限
        print(f"Generated {script_name} with index range [{start_index}, {end_index}) and logging to {log_file}")

if __name__ == "__main__":
    machine_num = int(input("Enter the number of machines: "))
    generate_bash_scripts(machine_num)


Generated qwen_bash/run_1.sh with index range [0, 12539) and logging to /mnt/lingjiejiang/multimodal_code/data/dpo/Qwen2-VL-72B-Instruct_log/dpo_0_12539.log
Generated qwen_bash/run_2.sh with index range [12539, 25078) and logging to /mnt/lingjiejiang/multimodal_code/data/dpo/Qwen2-VL-72B-Instruct_log/dpo_12539_25078.log
Generated qwen_bash/run_3.sh with index range [25078, 37617) and logging to /mnt/lingjiejiang/multimodal_code/data/dpo/Qwen2-VL-72B-Instruct_log/dpo_25078_37617.log
Generated qwen_bash/run_4.sh with index range [37617, 50156) and logging to /mnt/lingjiejiang/multimodal_code/data/dpo/Qwen2-VL-72B-Instruct_log/dpo_37617_50156.log
Generated qwen_bash/run_5.sh with index range [50156, 62695) and logging to /mnt/lingjiejiang/multimodal_code/data/dpo/Qwen2-VL-72B-Instruct_log/dpo_50156_62695.log
Generated qwen_bash/run_6.sh with index range [62695, 75234) and logging to /mnt/lingjiejiang/multimodal_code/data/dpo/Qwen2-VL-72B-Instruct_log/dpo_62695_75234.log
Generated qwen_bas

In [7]:
import os
import json

# 配置路径
DATA_PATH = "/mnt/lingjiejiang/multimodal_code/data/dpo/merged_html_chart_150k.json"
OUTPUT_DIR = "qwen_bash_7b"
PYTHON_SCRIPT = "data_process/dpo_openmodel/html_generate_7b.py"
MODEL_NAME = "Qwen2-VL-7B-Instruct"
MODEL_PATH = "/mnt/lingjiejiang/textual_aesthetics/model_checkpoint/vlm_checkpoints/Qwen2-VL-7B-Instruct"
CUDA_DEVICES = ["0", "1", "2", "3"]  # **设置可用的 GPU 编号**

def get_total_lines(file_path):
    """计算 JSON 文件的总数据量"""
    with open(file_path, "r") as f:
        data = json.load(f)
    return len(data)

def generate_multi_gpu_script():
    """生成 `run_multi_gpu.sh`，在一台机器上并行使用多个 GPU"""
    os.makedirs(OUTPUT_DIR, exist_ok=True)
    log_dir = f"/mnt/lingjiejiang/multimodal_code/data/dpo/{MODEL_NAME}_log"
    os.makedirs(log_dir, exist_ok=True)  # 创建日志目录

    total_lines = get_total_lines(DATA_PATH)
    num_gpus = len(CUDA_DEVICES)
    batch_size = total_lines // num_gpus  # 每个 GPU 处理的索引范围

    script_name = os.path.join(OUTPUT_DIR, "run_multi_gpu.sh")
    
    with open(script_name, "w") as f:
        f.write("#!/bin/bash\n\n")

        for i, gpu_id in enumerate(CUDA_DEVICES):
            start_index = i * batch_size
            end_index = total_lines if i == num_gpus - 1 else (i + 1) * batch_size  # 最后一个 GPU 处理剩余部分

            log_file = f"{log_dir}/dpo_{start_index}_{end_index}.log"

            f.write(f"""CUDA_VISIBLE_DEVICES={gpu_id} python {PYTHON_SCRIPT} \\
    --model_name {MODEL_NAME} \\
    --model_path {MODEL_PATH} \\
    --batch_size 64 \\
    --start_index {start_index} \\
    --end_index {end_index} | tee -a {log_file} &\n""")  # **后台运行 (&) 任务**

        f.write("\nwait\n")  # **等待所有进程完成**

    # os.chmod(script_name, 0o755)  # 赋予执行权限
    print(f"Generated {script_name} for multi-GPU execution.")

if __name__ == "__main__":
    generate_multi_gpu_script()


Generated qwen_bash_7b/run_multi_gpu.sh for multi-GPU execution.
