Skip to content

Commit

Permalink
Add pythainlp.coref
Browse files Browse the repository at this point in the history
  • Loading branch information
wannaphong committed Jun 4, 2023
1 parent c7c0367 commit 4d5d836
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/notes/installation.rst
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ where ``extras`` can be
- ``esupar`` (to support esupar engine)
- ``transformers_ud`` (to support transformers_ud engine)
- ``dependency_parsing`` (to support dependency parsing with all engine)
- ``coreference_resolution`` (to support coreference esolution with all engine)
- ``full`` (install everything)

For dependency details, look at `extras` variable in `setup.py <https://github.com/PyThaiNLP/pythainlp/blob/dev/setup.py>`_.
Expand Down
19 changes: 19 additions & 0 deletions pythainlp/coref/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2016-2023 PyThaiNLP Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
PyThaiNLP Coreference Resolution
"""
__all__ = ["CoreferenceResolution"]
from pythainlp.coref.core import coreference_resolution
29 changes: 29 additions & 0 deletions pythainlp/coref/_fastcoref.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2016-2023 PyThaiNLP Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import spacy


class FastCoref:
def __init__(self, model_name, nlp=spacy.blank("th"), device="cpu", type="FCoref") -> None:
if type == "FCoref":
from fastcoref import FCoref as _model
else:
from fastcoref import LingMessCoref as _model
self.model_name = model_name
self.nlp = nlp
self.model = _model(self.model_name,device=device,nlp=self.nlp)

def predict(self, texts:list):
return self.model.predict(texts=texts)
23 changes: 23 additions & 0 deletions pythainlp/coref/core.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2016-2023 PyThaiNLP Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
model = None


def coreference_resolution(text, model_name="han-coref-v1.0", device="cpu"):
global model
if model == None and model_name=="han-coref-v1.0":
from pythainlp.coref.han_coref import HanCoref
model = HanCoref(device=device)
return model.predict(text)
25 changes: 25 additions & 0 deletions pythainlp/coref/han_coref.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2016-2023 PyThaiNLP Project
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from pythainlp.coref._fastcoref import FastCoref
import spacy


class HanCoref(FastCoref):
def __init__(self,device="cpu",nlp=spacy.blank("th")) -> None:
super(self.__class__, self).__init__(
model_name="pythainlp/han-coref-v1.0",
device=device,
nlp=nlp
)
6 changes: 6 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,10 @@
"ufal.chu-liu-edmonds>=1.0.2",
"transformers>=4.22.1",
],
"coreference_resolution":{
"spacy>=3.0",
"fastcoref>=2.1.5",
},
"full": [
"PyYAML>=5.3.1",
"attacut>=1.0.4",
Expand Down Expand Up @@ -137,6 +141,8 @@
"thai_nner",
"wunsen>=0.0.3",
"spacy_thai>=0.7.1",
"spacy>=3.0",
"fastcoref>=2.1.5",
"ufal.chu-liu-edmonds>=1.0.2",
],
}
Expand Down

0 comments on commit 4d5d836

Please sign in to comment.