Merge pull request #56 from nbgao/master

CLEVR-mcan_small
MILVLG · Dec 30, 2019 · 8957180 · 8957180
2 parents 517bfcf + 043c97a
commit 8957180
Show file tree

Hide file tree

Showing 4 changed files with 55 additions and 10 deletions.
diff --git a/README.md b/README.md
@@ -29,7 +29,7 @@ Results and models are available in [MODEL ZOO](https://openvqa.readthedocs.io/e
 | [MFB](https://arxiv.org/abs/1708.01471v1) | ✓                               |                                                              |                                                        |
 | [MFH](https://arxiv.org/abs/1708.03619)   | ✓                               |                                                              |                                                        |
 | [BAN](https://arxiv.org/abs/1805.07932)   | ✓                               |       ✓                                                       |                                                        |
-| [MCAN](https://arxiv.org/abs/1906.10770)  | ✓                               | ✓                                                            |                                                        |
+| [MCAN](https://arxiv.org/abs/1906.10770)  | ✓                               | ✓                                                            | ✓                                                      |
 
 ## News & Updates
 

diff --git a/configs/clevr/mcan_small.yml b/configs/clevr/mcan_small.yml
@@ -11,13 +11,13 @@ FLAT_OUT_SIZE: 1024
 
 # Execution
 BATCH_SIZE: 64
-LR_BASE: 0.00003
+LR_BASE: 0.00004
 LR_DECAY_R: 0.2
 LR_DECAY_LIST: [13, 15]
 WARMUP_EPOCH: 3
 MAX_EPOCH: 16
 GRAD_NORM_CLIP: -1
-GRAD_ACCU_STEPS: 1
+GRAD_ACCU_STEPS: 2
 LOSS_FUNC: ce
 LOSS_REDUCTION: sum
 OPT: Adam

diff --git a/docs/_source/basic/model_zoo.md b/docs/_source/basic/model_zoo.md
@@ -25,7 +25,7 @@ We provide three groups of results (including the accuracies of *Overall*, *Yes/
 #### Train -> Val
 
 | Model                                                                                  | Base lr | Overall (%) | Yes/No (%) | Number (%) | Other (%) |
-| -------------------------------------------------------------------------------------- | ------- | ----------- | ---------- | ---------- | --------- |
+|:--------------------------------------------------------------------------------------:|:-------:|:-----------:|:----------:|:----------:|:---------:|
 | [BUTD](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/butd.yml)             | 2e-3    | 63.84       | 81.40      | 43.81      | 55.78     |
 | [MFB](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfb.yml)               | 7e-4    | 65.35       | 83.23      | 45.31      | 57.05     |
 | [MFH](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfh.yml)               | 7e-4    | 66.18       | 84.07      | 46.55      | 57.78     |
@@ -37,7 +37,7 @@ We provide three groups of results (including the accuracies of *Overall*, *Yes/
 #### Train+val -> Test-dev
 
 | Model                                                                                  | Base lr | Overall (%) | Yes/No (%) | Number (%) | Other (%) | Download                                                                                                                  |
-| -------------------------------------------------------------------------------------- | ------- | ----------- | ---------- | ---------- | --------- | ------------------------------------------------------------------------------------------------------------------------- |
+|:--------------------------------------------------------------------------------------:|:-------:|:-----------:|:----------:|:----------:|:---------:|:-------------------------------------------------------------------------------------------------------------------------:|
 | [BUTD](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/butd.yml)             | 2e-3    | 66.98       | 83.28      | 46.19      | 57.85     | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EWSOkcCVGMpAot9ol0IJP3ABv3cWFRvGFB67980PHiCk3Q?e=OkjDhj) |
 | [MFB](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfb.yml)               | 7e-4    | 68.29       | 84.64      | 48.29      | 58.89     | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ET-B23hG7UNPrQ0hha77V5kBMxAokIr486lB3YwMt-zhow?e=XBk7co) |
 | [MFH](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfh.yml)               | 7e-4    | 69.11       | 85.56      | 48.81      | 59.69     | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EUpvJD3c7NZJvBAbFOXTS0IBk1jCSz46bi7Pfq1kzJ35PA?e=be97so) |
@@ -49,7 +49,7 @@ We provide three groups of results (including the accuracies of *Overall*, *Yes/
 #### Train+val+vg -> Test-dev
 
 | Model                                                                                  | Base lr | Overall (%) | Yes/No (%) | Number (%) | Other (%) | Download                                                                                                                  |
-| -------------------------------------------------------------------------------------- | ------- | ----------- | ---------- | ---------- | --------- | ------------------------------------------------------------------------------------------------------------------------- |
+|:--------------------------------------------------------------------------------------:|:-------:|:-----------:|:----------:|:----------:|:---------:|:-------------------------------------------------------------------------------------------------------------------------:|
 | [BUTD](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/butd.yml)             | 2e-3    | 67.54       | 83.48      | 46.97      | 58.62     | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EbLMhJsx9AVJi-ipqtkzHckBS5TWo_au3T8wHPEdDKMgPQ?e=kozuxV) |
 | [MFB](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfb.yml)               | 7e-4    | 68.25       | 84.79      | 48.24      | 58.68     | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EfLYkv1XBgNJgOMU5PAo04YBHxAVmpeJtnZecqJztJdNig?e=OVPJSk) |
 | [MFH](https://github.com/MILVLG/openvqa/tree/master/configs/vqa/mfh.yml)               | 7e-4    | 68.86       | 85.38      | 49.27      | 59.21     | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EXGNuWmba8JOnQkkpfqokqcBzJ6Yw1ID6hl7hj2nyJaNJA?e=3TL5HC) |
@@ -63,12 +63,12 @@ We provide a group of results (including *Accuracy*, *Binary*, *Open*, *Validity
 
 - **Train+val -> Test-dev**: trained on the `train(balance) + val(balance)` splits and evaluated on the `test-dev(balance)` split. 
 
-*The results shown in the following are obtained from the [online server](https://evalai.cloudcv.org/web/challenges/challenge-page/225/overview). Note that the offline Test-dev result is evaluated by the provided offical script, which results in slight difference with the online result due to some unknow reasons.
+**The results shown in the following are obtained from the [online server](https://evalai.cloudcv.org/web/challenges/challenge-page/225/overview). Note that the offline Test-dev result is evaluated by the provided offical script, which results in slight difference compared to the online result due to some unknown reasons.**
 
 #### Train+val -> Test-dev
 
 | Model | Base lr | Accuracy (%) | Binary (%) | Open (%) | Validity (%) | Plausibility (%) | Consistency (%) | Distribution | Download |
-| ------| ------- | ------------ | ---------- | -------- | ------------ | ---------------- |---------------- | ------------ | -------- |
+|:------:|:-------:|:------------:|:----------:|:--------:|:------------:|:----------------:|:----------------:|:------------:|:--------:|
 | [BUTD (frcn+bbox)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/butd.yml)        | 2e-4    | 53.38       | 67.78      | 40.72      | 96.62     | 84.81     | 77.62     | 1.26     | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EaalaQ6VmBJCgeoZiPp45_gBn20g7tpkp-Uq8IVFcun64w?e=WgRMEj) |
 | [BAN-4 (frcn+bbox)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/ban_4.yml)        | 2e-4    | 55.01       | 72.02      | 40.06      | 96.94     | 85.67     | 81.85     | 1.04     | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/EdRIuVXaJqBJoXg3T7N0xfYBsPl-GlgW2hq2toqm2gOxXg?e=hPng3c) |
 | [BAN-8 (frcn+bbox)](https://github.com/MILVLG/openvqa/tree/master/configs/gqa/ban_8.yml)        | 1e-4    | 56.19       | 73.31      | 41.13      | 96.77     | 85.58     | 84.64     | 1.09     | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ES8FCQxFsqJBnvdoOcF_724BJgJml6iStYYK9UeUbI8Uyw?e=Pcff9r) |
@@ -81,3 +81,12 @@ We provide a group of results (including *Accuracy*, *Binary*, *Open*, *Validity
 
 ## CLEVR
 
+We provide a group of results (including *Overall*, *Count*, *Exist*, *Compare Numbers*, *Query Attribute*, *Compare Attribute*) for each model on CLEVR as follows.  
+
+- **Train -> Val**: trained on the `train` split and evaluated on the `val` split. 
+
+#### Train -> Val
+
+| Model | Base lr | Overall (%) | Count (%) | Exist (%) | Compare Numbers (%) | Query Attribute (%) | Compare Attribute (%) | Download |
+|:-----:|:-------:|:-------------:|:--------:|:--------:|:--------:|:--------:|:--------:|:--------:|
+| [MCAN-small](https://github.com/MILVLG/openvqa/tree/master/configs/clevr/mcan_small.yml) | 4e-5 | 98.74 | 96.81 | 99.27 | 98.89 | 99.53 | 99.19 | [model](https://awma1-my.sharepoint.com/:u:/g/personal/yuz_l0_tn/ERtwnuAoeHNKjs0qTkWC3cYBWVuUk7BLk88cnCKNFxYYlQ?e=lTRULt) |
diff --git a/openvqa/datasets/clevr/eval/result_eval.py b/openvqa/datasets/clevr/eval/result_eval.py
@@ -60,10 +60,46 @@ def eval(__C, dataset, ans_ix_list, pred_list, result_eval_file, ensemble_file,
 
         print('Write to log file: {}'.format(log_file))
         logfile = open(log_file, 'a+')
+        q_dict = {}
         for q_type, vals in sorted(correct_by_q_type.items()):
             vals = np.asarray(vals)
-            print(q_type, '%d / %d = %.2f' % (vals.sum(), vals.shape[0], 100.0 * vals.mean()))
-            logfile.write(q_type + ' : ' + '%d / %d = %.2f\n' % (vals.sum(), vals.shape[0], 100.0 * vals.mean()))
+            q_dict[q_type] = [vals.sum(), vals.shape[0]]
+            # print(q_type, '%d / %d = %.2f' % (vals.sum(), vals.shape[0], 100.0 * vals.mean()))
+            # logfile.write(q_type + ' : ' + '%d / %d = %.2f\n' % (vals.sum(), vals.shape[0], 100.0 * vals.mean()))
+
+        # Score Summary
+        score_type = ['Overall', 'Count', 'Exist', 'Compare_Numbers', 'Query_Attribute', 'Compare_Attribute']
+        compare_numbers_type = ['greater_than', 'less_than']
+        query_attribute_type = ['query_color', 'query_material', 'query_shape', 'query_size']
+        compare_attribute_type =  ['equal_color', 'equal_integer', 'equal_material', 'equal_shape', 'equal_size']
+        score_dict = {}
+        score_dict['Overall'] = q_dict['Overall']
+        score_dict['Count'] = q_dict['count']
+        score_dict['Exist'] = q_dict['exist']
+
+        correct_num, total_num = 0, 0
+        for q_type in compare_numbers_type:
+            correct_num += q_dict[q_type][0]
+            total_num += q_dict[q_type][1]
+        score_dict['Compare_Numbers'] = [correct_num, total_num]
+
+        correct_num, total_num = 0, 0
+        for q_type in query_attribute_type:
+            correct_num += q_dict[q_type][0]
+            total_num += q_dict[q_type][1]
+        score_dict['Query_Attribute'] = [correct_num, total_num]
+
+        correct_num, total_num = 0, 0
+        for q_type in compare_attribute_type:
+            correct_num += q_dict[q_type][0]
+            total_num += q_dict[q_type][1]
+        score_dict['Compare_Attribute'] = [correct_num, total_num]
+
+        for q_type in score_type:
+            val, tol = score_dict[q_type]
+            print(q_type, '%d / %d = %.2f' % (val, tol, 100.0 * val / tol))
+            logfile.write(q_type + ' : ' + '%d / %d = %.2f\n' % (val, tol, 100.0 * val / tol))
+
         logfile.write("\n")
         logfile.close()