In [1]:
# Install preprocessing libraries
!pip install datasets
!pip install --upgrade --force-reinstall Pillow

Collecting datasets
  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)
Collecting pyarrow>=15.0.0 (from datasets)
  Downloading pyarrow-18.1.0-cp310-cp310-manylinux_2_28_x86_64.whl.metadata (3.3 kB)
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)
Collecting pandas (from datasets)
  Downloading pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (89 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m89.9/89.9 kB[0m [31m9.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting requests>=2.32.2 (from datasets)
  Downloading requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting tqdm>=4.66.3 (from datasets)
  Downloading tqdm-4.67.1-py3-none-any.whl.metadata (57 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.7/57.7 kB[0m [31m23.2 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting xxhash (from datasets)
  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_

In [2]:
import os
import json
import uuid
from PIL import Image
from io import BytesIO
from datasets import Dataset
import requests

def process_few_shot_to_json(data, output_folder, subset_name):

    # Define folders for saving data
    subset_folder = os.path.join(output_folder, subset_name)
    image_subfolder = os.path.join(output_folder, 'images')

    # Ensure folders exist
    if not os.path.exists(image_subfolder):
        os.makedirs(image_subfolder)
    if not os.path.exists(subset_folder):
        os.makedirs(subset_folder)

    # Initialize JSON data list
    json_data_list = []

    for item in data:
        # Extract values from Few-shot prompt
        system_message = item["messages"][0]["content"]  # System message (optional, not used here)
        user_text = item["messages"][1]["content"][0]["text"]  # User question/input
        image_url = item["messages"][1]["content"][1]["image_url"]["url"]  # Image URL
        assistant_response = item["messages"][2]["content"]  # Assistant response

        # Combine system_message and assistant_response
        combined_response = f"{system_message}\n\n{assistant_response}"

        # Load and save the image locally
        response = requests.get(image_url)
        image = Image.open(BytesIO(response.content))

        # Generate a unique ID
        unique_id = str(uuid.uuid4())

        # Define image file path
        image_path = os.path.join(image_subfolder, f"{unique_id}.jpg")
        image.save(image_path)

        # Construct JSON structure
        json_data = {
            "id": unique_id,
            "image": f"{unique_id}.jpg",  # Save locally saved image filename
            "conversations": [
                {
                    "from": "human",
                    "value": user_text  # User content
                },
                {
                    "from": "gpt",
                    "value": combined_response  # Assistant response + system_message
                }
            ]
        }
        json_data_list.append(json_data)

    # Save JSON file
    json_output_path = os.path.join(subset_folder, 'dataset.json')
    with open(json_output_path, 'w') as json_file:
        json.dump(json_data_list, json_file, indent=4)

    print(f"Data saved to {json_output_path}")
    return json_data_list

# JSONL 파일 경로
jsonl_file = '/workspace/output_data.jsonl'

# JSONL 데이터 로드
with open(jsonl_file, "r") as f:
    labeled_data = [json.loads(line) for line in f]

# Output folder 설정
output_folder = 'dataset'

# Process the data and save it as JSON
processed_data = process_few_shot_to_json(labeled_data, output_folder, 'train')


Data saved to dataset/train/dataset.json


In [3]:
!git clone https://github.com/haotian-liu/LLaVA.git

Cloning into 'LLaVA'...
remote: Enumerating objects: 2297, done.[K
remote: Total 2297 (delta 0), reused 0 (delta 0), pack-reused 2297 (from 1)[K
Receiving objects: 100% (2297/2297), 13.71 MiB | 25.76 MiB/s, done.
Resolving deltas: 100% (1404/1404), done.


In [4]:
!cd LLaVA && pip install --upgrade pip && pip install -e .
!cd LLaVA && pip install -e ".[train]"
!pip install flash-attn --no-build-isolation

Collecting pip
  Downloading pip-24.3.1-py3-none-any.whl.metadata (3.7 kB)
Downloading pip-24.3.1-py3-none-any.whl (1.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m10.7 MB/s[0m eta [36m0:00:00[0m [36m0:00:01[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.3.1
    Uninstalling pip-23.3.1:
      Successfully uninstalled pip-23.3.1
Successfully installed pip-24.3.1
[0mObtaining file:///workspace/LLaVA
  Installing build dependencies ... [?25ldone
[?25h  Checking if build backend supports build_editable ... [?25ldone
[?25h  Getting requirements to build editable ... [?25ldone
[?25h  Preparing editable metadata (pyproject.toml) ... [?25ldone
[?25hCollecting torch==2.1.2 (from llava==1.2.2.post1)
  Downloading torch-2.1.2-cp310-cp310-manylinux1_x86_64.whl.metadata (25 kB)
Collecting torchvision==0.16.2 (from llava==1.2.2.post1)
  Downloading torchvision-0.16.2-cp310-cp310-m

In [5]:
!pip install deepspeed
!pip install wandb

[0m

In [6]:
import wandb

wandb.login()


[34m[1mwandb[0m: Using wandb-core as the SDK backend.  Please refer to https://wandb.me/wandb-core for more information.
[34m[1mwandb[0m: Logging into wandb.ai. (Learn how to deploy a W&B server locally: https://wandb.me/wandb-server)
[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit:

  ········


[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

In [7]:
!deepspeed LLaVA/llava/train/train_mem.py \
    --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 \
    --deepspeed LLaVA/scripts/zero3.json \
    --model_name_or_path liuhaotian/llava-v1.5-13b \
    --version v1 \
    --data_path ./dataset/train/dataset.json \
    --image_folder ./dataset/images \
    --vision_tower openai/clip-vit-large-patch14-336 \
    --mm_projector_type mlp2x_gelu \
    --mm_vision_select_layer -2 \
    --mm_use_im_start_end False \
    --mm_use_im_patch_token False \
    --image_aspect_ratio pad \
    --group_by_modality_length True \
    --bf16 True \
    --output_dir ./checkpoints/llava-v1.5-13b-task-lora \
    --num_train_epochs 3 \
    --per_device_train_batch_size 16 \
    --per_device_eval_batch_size 4 \
    --gradient_accumulation_steps 1 \
    --evaluation_strategy "no" \
    --save_strategy "steps" \
    --save_steps 50000 \
    --save_total_limit 1 \
    --learning_rate 2e-4 \
    --weight_decay 0. \
    --warmup_ratio 0.03 \
    --lr_scheduler_type "cosine" \
    --logging_steps 1 \
    --tf32 True \
    --model_max_length 2048 \
    --gradient_checkpointing True \
    --dataloader_num_workers 4 \
    --lazy_preprocess True \
    --report_to wandb

[2024-11-28 05:15:27,908] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
[2024-11-28 05:15:29,876] [INFO] [runner.py:571:main] cmd = /usr/bin/python -u -m deepspeed.launcher.launch --world_info=eyJsb2NhbGhvc3QiOiBbMF19 --master_addr=127.0.0.1 --master_port=29500 --enable_each_rank_log=None LLaVA/llava/train/train_mem.py --lora_enable True --lora_r 128 --lora_alpha 256 --mm_projector_lr 2e-5 --deepspeed LLaVA/scripts/zero3.json --model_name_or_path liuhaotian/llava-v1.5-13b --version v1 --data_path ./dataset/train/dataset.json --image_folder ./dataset/images --vision_tower openai/clip-vit-large-patch14-336 --mm_projector_type mlp2x_gelu --mm_vision_select_layer -2 --mm_use_im_start_end False --mm_use_im_patch_token False --image_aspect_ratio pad --group_by_modality_length True --bf16 True --output_dir ./checkpoints/llava-v1.5-13b-task-lora --num_train_epochs 3 --per_device_train_batch_size 16 --per_device_eval_batch_size 4 --gradient_accumu

In [8]:
!python LLaVA/scripts/merge_lora_weights.py \
    --model-path checkpoints/llava-v1.5-13b-task-lora \
    --model-base liuhaotian/llava-v1.5-13b \
    --save-model-path deepfake-llava-model

[2024-11-28 05:57:00,639] [INFO] [real_accelerator.py:161:get_accelerator] Setting ds_accelerator to cuda (auto detect)
Loading LLaVA from base model...
  return self.fget.__get__(instance, owner)()
Loading checkpoint shards: 100%|██████████████████| 3/3 [00:00<00:00,  8.27it/s]
Loading additional LLaVA weights...
Loading LoRA weights...
Merging LoRA weights...
Model is loaded...
Non-default generation parameters: {'max_length': 4096}
