Skip to content

Commit

Permalink
FEA/update readme and docstring
Browse files Browse the repository at this point in the history
  • Loading branch information
mengyuxian committed Oct 3, 2020
1 parent 564e90e commit bbf4a29
Show file tree
Hide file tree
Showing 19 changed files with 21 additions and 165 deletions.
20 changes: 20 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,23 @@ dataset path, bert model path and log path, respectively.
and save the topk checkpoints to `default_root_dir`.

To evaluate them, use `evaluate.py`


## Results
With training scripts in `scripts/reproduce`, one should be able to reproduce results for four datasets below:

### Flat NER Datasets

| Dataset | Zh-MSRA | Zh-OntoNotes4.0 |
|---|---|---|
| Previous SOTA | 95.54 | 81.63 |
| Our method | **95.75** | **82.11** |
| | **(+0.21)** | **(+0.48)** |

### Nested NER Datasets

| Dataset | ACE 2004 | ACE 2005 |
|---|---|---|
| Previous SOTA | 84.7 | 84.33 |
| Our method | **85.98** | **86.88** |
| | **(+1.28)** | **(+2.55)** |
10 changes: 0 additions & 10 deletions datasets/collate_functions.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com
@version: 1.0
@file: collate_functions
@time: 2020/9/12 14:21
@desc:
"""

import torch
from typing import List
Expand Down
10 changes: 0 additions & 10 deletions datasets/mrc_ner_dataset.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com
@version: 1.0
@file: mrc_ner_dataset
@time: 2020/9/6 14:27
@desc:
"""


import json
Expand Down
10 changes: 0 additions & 10 deletions datasets/truncate_dataset.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com
@version: 1.0
@file: TruncateDatset
@time: 2020/7/3 17:48
Truncate Datset, mostly used for debug
"""

from torch.utils.data import Dataset

Expand Down
9 changes: 0 additions & 9 deletions evaluate.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com

@version: 1.0
@file: evaluate
@time: 2020/8/28 21:36
@desc:
"""

import os
from pytorch_lightning import Trainer
Expand Down
10 changes: 0 additions & 10 deletions loss/adaptive_dice_loss.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com
@version: 1.0
@file: adaptive_dice_loss
@time: 2020/9/14 17:24
@desc: todo
"""


import torch
Expand Down
10 changes: 0 additions & 10 deletions loss/dice_loss.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com
@version: 1.0
@file: dice_loss
@time: 2020/9/9 10:55
@desc: todo
"""


import torch
Expand Down
10 changes: 0 additions & 10 deletions metrics/functional/query_span_f1.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com
@version: 1.0
@file: query_span_f1
@time: 2020/9/6 20:05
@desc:
"""


import torch
Expand Down
10 changes: 0 additions & 10 deletions metrics/query_span_f1.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com
@version: 1.0
@file: query_span_f1
@time: 2020/9/7 20:40
@desc:
"""


from pytorch_lightning.metrics.metric import TensorMetric
Expand Down
9 changes: 0 additions & 9 deletions models/bert_query_ner.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com

@version: 1.0
@file: bert_query_ner
@time: 2020/9/6 17:01
@desc:
"""

import torch
import torch.nn as nn
Expand Down
10 changes: 0 additions & 10 deletions models/classifier.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com
@version: 1.0
@file: classifier
@time: 2020/9/6 17:02
@desc:
"""


import torch.nn as nn
Expand Down
10 changes: 0 additions & 10 deletions models/query_ner_config.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com
@version: 1.0
@file: query_ner_config
@time: 2020/9/6 22:18
@desc:
"""


from transformers import BertConfig
Expand Down
10 changes: 0 additions & 10 deletions ner2mrc/genia2mrc.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com
@version: 1.0
@file: genia2mrc.py
@time: 2020/9/12 20:19
@desc:
"""


import os
Expand Down
9 changes: 0 additions & 9 deletions ner2mrc/msra2mrc.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com

@version: 1.0
@file: msra2mrc
@time: 2020/9/9 20:56
@desc: Convert MSRA raw data to MRC format
"""

import os
from utils.bmes_decode import bmes_decode
Expand Down
2 changes: 1 addition & 1 deletion scripts/reproduce/ace05.sh
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ python trainer.py \
--warmup_steps $WARMUP \
--max_length $MAXLEN \
--gradient_clip_val $MAXNORM \
--optimizer "sgd"
--optimizer "adamw"
10 changes: 0 additions & 10 deletions trainer.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com
@version: 1.0
@file: trainer
@time: 2020/9/6 14:26
@desc: pytorch-lightning trainer
"""


import argparse
Expand Down
10 changes: 0 additions & 10 deletions utils/bmes_decode.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,4 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com
@version: 1.0
@file: bmes_decode
@time: 2020/9/8 10:55
@desc:
"""


from typing import Tuple, List
Expand Down
8 changes: 0 additions & 8 deletions utils/get_parser.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com

@version: 1.0
@file: get_parser
@time: 2020/7/9 15:48
"""

import argparse

Expand Down
9 changes: 0 additions & 9 deletions utils/radom_seed.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,5 @@
# encoding: utf-8
"""
@author: Yuxian Meng
@contact: yuxian_meng@shannonai.com

@version: 1.0
@file: radom_seed
@time: 2020/7/9 15:53
这一行开始写关于本文件的说明与解释
"""

import numpy as np
import torch
Expand Down

0 comments on commit bbf4a29

Please sign in to comment.