Skip to content

Commit

Permalink
✨ add DPCNN to model zoo
Browse files Browse the repository at this point in the history
  • Loading branch information
alexwwang committed Jul 2, 2019
1 parent 7afa174 commit 18fb5cf
Show file tree
Hide file tree
Showing 2 changed files with 174 additions and 0 deletions.
3 changes: 3 additions & 0 deletions kashgari/tasks/classification/__init__.py
Expand Up @@ -19,6 +19,7 @@
from kashgari.tasks.classification.models import AVRNN_Model
from kashgari.tasks.classification.models import Dropout_BiGRU_Model
from kashgari.tasks.classification.models import Dropout_AVRNN_Model
from kashgari.tasks.classification.dpcnn_model import DPCNN_Model


BLSTMModel = BiLSTM_Model
Expand All @@ -32,3 +33,5 @@
AVRNNModel = AVRNN_Model
DropoutBGRUModel = Dropout_BiGRU_Model
DropoutAVRNNModel = Dropout_AVRNN_Model

DPCNN = DPCNN_Model
171 changes: 171 additions & 0 deletions kashgari/tasks/classification/dpcnn_model.py
@@ -0,0 +1,171 @@
# encoding: utf-8

# author: Alex
# contact: ialexwwang@gmail.com
# version: 0.1
# license: Apache Licence
# file: dpcnn_model.py
# time: 2019-07-02 19:15
# Reference:
# https://ai.tencent.com/ailab/media/publications/ACL3-Brady.pdf
# https://github.com/Cheneng/DPCNN
# https://github.com/miracleyoo/DPCNN-TextCNN-Pytorch-Inception
# https://www.kaggle.com/michaelsnell/conv1d-dpcnn-in-keras

import logging
from math import log2, floor
import tensorflow as tf
from typing import Dict, Any
from kashgari.layers import L, KMaxPoolingLayer
from kashgari.tasks.classification.base_model import BaseClassificationModel


class DPCNN_Model(BaseClassificationModel):

@classmethod
def get_default_hyper_parameters(cls) -> Dict[str, Dict[str, Any]]:
pool_type = 'max'
filters = 250
activation = 'linear'
return {
'region_embedding': {
'filters': filters,
'kernel_size': 3,
'strides': 1,
'padding': 'same',
'activation': activation,
'name': 'region_embedding',
},
'region_dropout': {
'rate': 0.2,
},
'conv_block': {
'filters': filters,
'kernel_size': 3,
'activation': activation,
'shortcut': True,
},
'resnet_block': {
'filters': filters,
'kernel_size': 3,
'activation': activation,
'shortcut': True,
'pool_type': pool_type,
'sorted': True,
},
'dense': {
'units': 256,
'activation': activation,
},
'dropout': {
'rate': 0.5,
},
'activation': {
'activation': 'softmax',
}
}

def downsample(self, inputs, pool_type: str = 'max',
sorted: bool = True, stage: int = 1): # noqa: A002
layers_pool = []
if pool_type == 'max':
layers_pool.append(
L.MaxPooling1D(pool_size=3,
strides=2,
padding='same',
name=f'pool_{stage}'))
elif pool_type == 'k_max':
k = int(inputs.shape[1].value / 2)
layers_pool.append(
KMaxPoolingLayer(k=k,
sorted=sorted,
name=f'pool_{stage}'))
elif pool_type == 'conv':
layers_pool.append(
L.Conv1D(filters=inputs.shape[-1].value,
kernel_size=3,
strides=2,
padding='same',
name=f'pool_{stage}'))
layers_pool.append(
L.BatchNormalization())
elif pool_type is None:
layers_pool = []
else:
raise ValueError(f'unsupported pooling type `{pool_type}`!')

tensor_out = inputs
for layer in layers_pool:
tensor_out = layer(tensor_out)
return tensor_out

def conv_block(self, inputs, filters: int, kernel_size: int = 3,
activation: str = 'linear', shortcut: bool = True):
layers_conv_unit = []
layers_conv_unit.append(
L.BatchNormalization())
layers_conv_unit.append(
L.PReLU())
layers_conv_unit.append(
L.Conv1D(filters=filters,
kernel_size=kernel_size,
strides=1,
padding='same',
activation=activation))
layers_conv_block = layers_conv_unit * 2

tensor_out = inputs
for layer in layers_conv_block:
tensor_out = layer(tensor_out)

if shortcut:
tensor_out = L.Add()([inputs, tensor_out])

return tensor_out

def resnet_block(self, inputs, filters: int, kernel_size: int = 3,
activation: str = 'linear', shortcut: bool = True,
pool_type: str = 'max', sorted: bool = True, stage: int = 1): # noqa: A002
tensor_pool = self.downsample(inputs, pool_type=pool_type, sorted=sorted, stage=stage)
tensor_out = self.conv_block(tensor_pool, filters=filters, kernel_size=kernel_size,
activation=activation, shortcut=shortcut)
return tensor_out

def build_model_arc(self):
output_dim = len(self.pre_processor.label2idx)
config = self.hyper_parameters
embed_model = self.embedding.embed_model

layers_region = [
L.Conv1D(**config['region_embedding']),
L.BatchNormalization(),
L.PReLU(),
L.Dropout(**config['region_embedding'])
]

layers_main = [
L.GlobalMaxPooling1D(),
L.Dense(**config['dense']),
L.BatchNormalization(),
L.PReLU(),
L.Dropout(**config['dropout']),
L.Dense(output_dim, **config['activation'])
]

tensor_out = embed_model.inputs

# build region tensors
for layer in layers_region:
tensor_out = layer(tensor_out)

# build the base pyramid layer
tensor_out = self.conv_block(tensor_out, **config['conv_block'])
# build the above pyramid layers while `steps > 2`
seq_len = tensor_out.shape[1].value
for i in range(floor(log2(seq_len)) - 2):
tensor_out = self.resnet_block(tensor_out, stage=i + 1,
**config['resnet_block'])
for layer in layers_main:
tensor_out = layer(tensor_out)

self.tf_model = tf.keras.Model(embed_model.inputs, tensor_out)

0 comments on commit 18fb5cf

Please sign in to comment.