Skip to content

Commit

Permalink
Adding Control for Annotators with One Column (#12997)
Browse files Browse the repository at this point in the history
* SPARKNLP-632 Adding control for annotators with only one expected column in Python side

* SPARKNLP-632 Adding inputAnnotatorTypes and optionalInputAnnotatorTypes in annotators for Python

* SPARKNLP-632 Removing useless prints to terminal

* SPARKNLP-632 Adding AnnotatorType initialization in Python

* SPARKNLP-632 Removing wrong imports
  • Loading branch information
danilojsl committed Nov 10, 2022
1 parent 288afaf commit 902b07d
Show file tree
Hide file tree
Showing 97 changed files with 520 additions and 10 deletions.
3 changes: 3 additions & 0 deletions python/sparknlp/annotator/audio/wav2vec2_for_ctc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
"""Contains classes concerning Wav2Vec2ForCTC."""

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class Wav2Vec2ForCTC(AnnotatorModel,
Expand Down Expand Up @@ -86,6 +87,8 @@ class Wav2Vec2ForCTC(AnnotatorModel,
"""
name = "Wav2Vec2ForCTC"

inputAnnotatorTypes = [AnnotatorType.AUDIO]

configProtoBytes = Param(Params._dummy(),
"configProtoBytes",
"ConfigProto from tensorflow, serialized into byte array. Get with "
Expand Down
2 changes: 2 additions & 0 deletions python/sparknlp/annotator/chunker.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.
"""Contains classes for the Chunker."""
from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class Chunker(AnnotatorModel):
Expand Down Expand Up @@ -100,6 +101,7 @@ class Chunker(AnnotatorModel):
--------
PerceptronModel : for Part-Of-Speech tagging
"""
inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.POS]

regexParsers = Param(Params._dummy(),
"regexParsers",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class AlbertForQuestionAnswering(AnnotatorModel,
Expand Down Expand Up @@ -87,6 +88,8 @@ class AlbertForQuestionAnswering(AnnotatorModel,
"""
name = "AlbertForQuestionAnswering"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.DOCUMENT]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Contains classes concerning AlbertForSequenceClassification."""

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class AlbertForSequenceClassification(AnnotatorModel,
Expand Down Expand Up @@ -101,6 +102,8 @@ class AlbertForSequenceClassification(AnnotatorModel,
"""
name = "AlbertForSequenceClassification"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Contains classes for AlbertForTokenClassification."""

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class AlbertForTokenClassification(AnnotatorModel,
Expand Down Expand Up @@ -96,6 +97,8 @@ class AlbertForTokenClassification(AnnotatorModel,

name = "AlbertForTokenClassification"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class BertForQuestionAnswering(AnnotatorModel,
Expand Down Expand Up @@ -87,6 +88,8 @@ class BertForQuestionAnswering(AnnotatorModel,
"""
name = "BertForQuestionAnswering"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.DOCUMENT]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Contains classes for BertForSequenceClassification."""

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class BertForSequenceClassification(AnnotatorModel,
Expand Down Expand Up @@ -101,6 +102,8 @@ class per document by averaging probabilities in all sentences, by
"""
name = "BertForSequenceClassification"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Contains classes for BertForTokenClassification."""

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class BertForTokenClassification(AnnotatorModel,
Expand Down Expand Up @@ -94,6 +95,8 @@ class BertForTokenClassification(AnnotatorModel,
"""
name = "BertForTokenClassification"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains classes for CamemBertForTokenClassification."""

from sparknlp.common import *


Expand Down Expand Up @@ -91,6 +90,8 @@ class CamemBertForTokenClassification(AnnotatorModel,
"""
name = "CamemBertForTokenClassification"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
4 changes: 4 additions & 0 deletions python/sparknlp/annotator/classifier_dl/classifier_dl.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from sparknlp.annotator.param import EvaluationDLParams, ClassifierEncoder
from sparknlp.base import DocumentAssembler
from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class ClassifierDLApproach(AnnotatorApproach, EvaluationDLParams, ClassifierEncoder):
Expand Down Expand Up @@ -114,6 +115,7 @@ class ClassifierDLApproach(AnnotatorApproach, EvaluationDLParams, ClassifierEnco
MultiClassifierDLApproach : for multi-class classification
SentimentDLApproach : for sentiment analysis
"""
inputAnnotatorTypes = [AnnotatorType.SENTENCE_EMBEDDINGS]

dropout = Param(Params._dummy(), "dropout", "Dropout coefficient", TypeConverters.toFloat)

Expand Down Expand Up @@ -235,6 +237,8 @@ class ClassifierDLModel(AnnotatorModel, HasStorageRef, HasEngine):

name = "ClassifierDLModel"

inputAnnotatorTypes = [AnnotatorType.SENTENCE_EMBEDDINGS]

def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.ClassifierDLModel", java_model=None):
super(ClassifierDLModel, self).__init__(
classname=classname,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class DeBertaForQuestionAnswering(AnnotatorModel,
Expand Down Expand Up @@ -87,6 +88,8 @@ class DeBertaForQuestionAnswering(AnnotatorModel,
"""
name = "DeBertaForQuestionAnswering"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.DOCUMENT]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains classes for DeBertaForSequenceClassification."""

from sparknlp.common import *


Expand Down Expand Up @@ -98,6 +97,8 @@ class DeBertaForSequenceClassification(AnnotatorModel,
"""
name = "DeBertaForSequenceClassification"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Contains classes for DeBertaForTokenClassification."""

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class DeBertaForTokenClassification(AnnotatorModel,
Expand Down Expand Up @@ -94,6 +95,8 @@ class DeBertaForTokenClassification(AnnotatorModel,
"""
name = "DeBertaForTokenClassification"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class DistilBertForQuestionAnswering(AnnotatorModel,
Expand Down Expand Up @@ -87,6 +88,8 @@ class DistilBertForQuestionAnswering(AnnotatorModel,
"""
name = "DistilBertForQuestionAnswering"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.DOCUMENT]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Contains classes for DistilBertForSequenceClassification."""

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class DistilBertForSequenceClassification(AnnotatorModel,
Expand Down Expand Up @@ -101,6 +102,8 @@ class DistilBertForSequenceClassification(AnnotatorModel,
"""
name = "DistilBertForSequenceClassification"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Contains classes for DistilBertForTokenClassification."""

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class DistilBertForTokenClassification(AnnotatorModel,
Expand Down Expand Up @@ -92,6 +93,8 @@ class DistilBertForTokenClassification(AnnotatorModel,
"""
name = "DistilBertForTokenClassification"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class LongformerForQuestionAnswering(AnnotatorModel,
Expand Down Expand Up @@ -87,6 +88,8 @@ class LongformerForQuestionAnswering(AnnotatorModel,
"""
name = "LongformerForQuestionAnswering"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.DOCUMENT]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Contains classes for LongformerForSequenceClassification."""

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class LongformerForSequenceClassification(AnnotatorModel,
Expand Down Expand Up @@ -101,6 +102,8 @@ class LongformerForSequenceClassification(AnnotatorModel,
"""
name = "LongformerForSequenceClassification"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Contains classes for LongformerForTokenClassification."""

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class LongformerForTokenClassification(AnnotatorModel,
Expand Down Expand Up @@ -93,6 +94,8 @@ class LongformerForTokenClassification(AnnotatorModel,

name = "LongformerForTokenClassification"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from sparknlp.annotator.param import EvaluationDLParams, ClassifierEncoder
from sparknlp.annotator.classifier_dl import ClassifierDLModel
from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class MultiClassifierDLApproach(AnnotatorApproach, EvaluationDLParams, ClassifierEncoder):
Expand Down Expand Up @@ -143,6 +144,7 @@ class MultiClassifierDLApproach(AnnotatorApproach, EvaluationDLParams, Classifie
ClassifierDLApproach : for single-class classification
SentimentDLApproach : for sentiment analysis
"""
inputAnnotatorTypes = [AnnotatorType.SENTENCE_EMBEDDINGS]

shufflePerEpoch = Param(Params._dummy(), "shufflePerEpoch", "whether to shuffle the training data on each Epoch",
TypeConverters.toBoolean)
Expand Down Expand Up @@ -290,6 +292,8 @@ class MultiClassifierDLModel(AnnotatorModel, HasStorageRef, HasEngine):
"""
name = "MultiClassifierDLModel"

inputAnnotatorTypes = [AnnotatorType.SENTENCE_EMBEDDINGS]

def __init__(self, classname="com.johnsnowlabs.nlp.annotators.classifier.dl.MultiClassifierDLModel",
java_model=None):
super(MultiClassifierDLModel, self).__init__(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# limitations under the License.

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class RoBertaForQuestionAnswering(AnnotatorModel,
Expand Down Expand Up @@ -87,6 +88,8 @@ class RoBertaForQuestionAnswering(AnnotatorModel,
"""
name = "RoBertaForQuestionAnswering"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.DOCUMENT]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Contains classes for RoBertaForSequenceClassification."""

from sparknlp.common import *
from sparknlp.common.annotator_type import AnnotatorType


class RoBertaForSequenceClassification(AnnotatorModel,
Expand Down Expand Up @@ -101,6 +102,8 @@ class RoBertaForSequenceClassification(AnnotatorModel,
"""
name = "RoBertaForSequenceClassification"

inputAnnotatorTypes = [AnnotatorType.DOCUMENT, AnnotatorType.TOKEN]

maxSentenceLength = Param(Params._dummy(),
"maxSentenceLength",
"Max sentence length to process",
Expand Down
Loading

0 comments on commit 902b07d

Please sign in to comment.