update lora fine-tuning res

Q-Future · Jan 7, 2024 · 85de1aa · 85de1aa
1 parent 9c9f6e4
commit 85de1aa
Show file tree

Hide file tree

Showing 7 changed files with 67 additions and 9 deletions.
diff --git a/lora_finetune/README.md b/lora_finetune/README.md
@@ -14,16 +14,34 @@ We propose a more efficient LoRA (tunes less parameter than LLaVA-style default
 sh scripts/${YOUR_DATASET}_lora.sh
 ```
 
-The available dataset options are `agi` (for AGIQA-3K), `cgi` (for CGIQA-6K), `live` (for LIVE) and `csiq` (for CSIQ), though we discourage fine-tuning on datasets that are very similar with the original training corpus of **OneAlign** (will make your adapted model less robust to be applied).
+The available template dataset options are `agi` (for AGIQA-3K), `cgi` (for CGIQA-6K), `livec` (for LIVE-Challenge), `csiq` (for CSIQ) and `maxwell` (for MaxWell, *videos*). 
 
+Please come with your datasets! (See [here](../playground/data/ft/) for examples on dataset preparation.) 
+
+Note: we do not encourage fine-tuning on datasets that are very similar with the original training corpus of **OneAlign** (might make your adapted model less robust). Just use the **Q-Align**.
 
 To evaluate, please refer to the code below:
 
+- IQA (all excluding MaxWell)
+
 ```shell
 python q_align/evaluate/iqa_eval_lora_split.py --model-path ${YOUR_MODEL_PATH} --model-base q-future/one-align
 ```
 
 By default (if `YOUR_MODEL_PATH` is not specified), if will automatically evaluate on the test set of AGIQA-3K (*split 1*).
+a
+- VQA (MaxWell)
+
+Please modify the `q_align/evaluate/vqa_eval.py` to only evaluate on MaxWell.
+
+```shell
+python q_align/evaluate/iqa_eval_lora_split.py --model-path q-future/q-align-maxwell-lora --model-base q-future/one-align
+```
+
 
-We will update more datasets and a complete performance report on this feature soon.
+### Performance Report
 
+| Dataset | AGIQA-3K | CGIQA-6K | LIVE-C | CSIQ | MaxWell | 
+| --- | --- | --- | --- | --- | --- |
+| Before LoRA Fine-tuning | 0.802/0.838 | 0.448/0.470 | 0.881/0.894 | 0.881/0.906 |  0.780/0.787 | 
+| After LoRA Fine-tuning | **0.880/0.920** | **0.847/0.849** | **0.920/0.934** | **0.929/0.949** | **0.803/0.816** |
diff --git a/scripts/agi-5split-lora.sh b/scripts/agi-5split-lora.sh
@@ -11,7 +11,7 @@ do
         --model_name_or_path $LOAD \
         --version v1 \
         --data_path $DATA_FILE \
-        --image_folder ../datasets/AGIQA-3K/ \
+        --image_folder playground/data/ \
         --image_aspect_ratio pad \
         --group_by_modality_length True \
         --bf16 True \

diff --git a/scripts/cgi-5split-lora.sh b/scripts/cgi-5split-lora.sh
@@ -11,11 +11,11 @@ do
         --model_name_or_path $LOAD \
         --version v1 \
         --data_path $DATA_FILE \
-        --image_folder ../datasets/CGIQA-6K/ \
+        --image_folder playground/data/ \
         --image_aspect_ratio pad \
         --group_by_modality_length True \
         --bf16 True \
-        --output_dir ./q-align-agi-lora-$i \
+        --output_dir ./q-align-cgi-lora-$i \
         --num_train_epochs 3 \
         --per_device_train_batch_size 32 \
         --per_device_eval_batch_size 4 \

diff --git a/scripts/csiq-5split-lora.sh b/scripts/csiq-5split-lora.sh
@@ -11,7 +11,7 @@ do
         --model_name_or_path $LOAD \
         --version v1 \
         --data_path $DATA_FILE \
-        --image_folder ../datasets/ \
+        --image_folder playground/data/ \
         --image_aspect_ratio pad \
         --group_by_modality_length True \
         --bf16 True \

diff --git a/scripts/live-10split-lora.sh b/scripts/live-10split-lora.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+LOAD='q-future/one-align'
+
+for i in $(seq 1 5)
+do
+    echo "Split $i"
+    DATA_FILE=playground/data/ft/live/train_split_$i.json
+    deepspeed --master_port 25801 q_align/train/train_mem.py \
+        --deepspeed ./scripts/zero3.json \
+        --lora_enable True --visual_abstractor_lr 2e-5\
+        --model_name_or_path $LOAD \
+        --version v1 \
+        --data_path $DATA_FILE \
+        --image_folder playground/data/ \
+        --image_aspect_ratio pad \
+        --group_by_modality_length True \
+        --bf16 True \
+        --output_dir ./q-align-live-lora-$i \
+        --num_train_epochs 3 \
+        --per_device_train_batch_size 32 \
+        --per_device_eval_batch_size 4 \
+        --gradient_accumulation_steps 1 \
+        --evaluation_strategy "no" \
+        --save_strategy "steps" \
+        --save_steps 800 \
+        --save_total_limit 3 \
+        --learning_rate 2e-4 \
+        --weight_decay 0. \
+        --warmup_ratio 0.03 \
+        --lr_scheduler_type "cosine" \
+        --logging_steps 1 \
+        --tf32 True \
+        --model_max_length 2048 \
+        --gradient_checkpointing True \
+        --tune_visual_abstractor True \
+        --freeze_vision_model False \
+        --dataloader_num_workers 4 \
+        --lazy_preprocess True \
+        --report_to wandb
+done
diff --git a/scripts/livec-5split-lora.sh b/scripts/livec-5split-lora.sh
@@ -11,7 +11,7 @@ do
         --model_name_or_path $LOAD \
         --version v1 \
         --data_path $DATA_FILE \
-        --image_folder ../datasets/ \
+        --image_folder playground/data/ \
         --image_aspect_ratio pad \
         --group_by_modality_length True \
         --bf16 True \

diff --git a/scripts/maxwell-officialsplit-lora.sh b/scripts/maxwell-officialsplit-lora.sh
@@ -11,12 +11,12 @@ do
         --model_name_or_path $LOAD \
         --version v1 \
         --data_path $DATA_FILE \
-        --image_folder ../datasets/MaxWell \
+        --image_folder playground/data/ \
         --image_aspect_ratio pad \
         --group_by_modality_length True \
         --bf16 True \
         --output_dir ./q-align-maxwell-lora-$i \
-        --num_train_epochs 5 \
+        --num_train_epochs 1 \
         --per_device_train_batch_size 4 \
         --per_device_eval_batch_size 4 \
         --gradient_accumulation_steps 8 \