Skip to content

Commit

Permalink
🚧 Work in progress.
Browse files Browse the repository at this point in the history
  • Loading branch information
BrikerMan committed May 10, 2020
1 parent 4ede342 commit e67c317
Show file tree
Hide file tree
Showing 21 changed files with 1,493 additions and 660 deletions.
3 changes: 0 additions & 3 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,6 @@ def __getattr__(cls, name):
'bert4keras',
'bert4keras.models',
'sklearn',
'seqeval',
'seqeval.metrics',
'seqeval.metrics.sequence_labeling',
'bert4keras.layers'
]
else:
Expand Down
184 changes: 184 additions & 0 deletions examples/classification_banchmark.ipynb

Large diffs are not rendered by default.

270 changes: 270 additions & 0 deletions examples/classification_banchmark_multi_label.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,270 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [
"# Multi-Label Classification Benchmark\n",
"\n",
"- Kashgari: 2.0.0\n",
"- TensorFlow: 2.0.0\n"
]
},
{
"cell_type": "markdown",
"source": [
"## Macros"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%% md\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"CORPUS_PATH = '/Users/brikerman/Downloads/jigsaw-toxic-comment-classification-challenge/train.csv'"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": null,
"outputs": [],
"source": [
"from kashgari.corpus import JigsawToxicCommentCorpus\n",
"corpus = JigsawToxicCommentCorpus(CORPUS_PATH)\n",
"\n",
"train_x, train_y = corpus.load_data()\n",
"test_x, test_y = corpus.load_data('test')\n",
"valid_x, valid_y = corpus.load_data('valid')"
],
"metadata": {
"collapsed": false,
"pycharm": {
"name": "#%%\n"
}
}
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Preparing text vocab dict: 100%|██████████| 3510/3510 [00:00<00:00, 43976.22it/s]\n",
"INFO:root:------ Build vocab dict finished, Top 10 token ------\n",
"INFO:root:Token: [PAD] -> 0\n",
"INFO:root:Token: [UNK] -> 1\n",
"INFO:root:Token: [BOS] -> 2\n",
"INFO:root:Token: [EOS] -> 3\n",
"INFO:root:Token: . -> 4\n",
"INFO:root:Token: the -> 5\n",
"INFO:root:Token: , -> 6\n",
"INFO:root:Token: \" -> 7\n",
"INFO:root:Token: to -> 8\n",
"INFO:root:Token: i -> 9\n",
"INFO:root:------ Build vocab dict finished, Top 10 token ------\n",
"Preparing classification label vocab dict: 100%|██████████| 3510/3510 [00:00<00:00, 938723.91it/s]\n",
"Calculating sequence length: 100%|██████████| 3510/3510 [00:00<00:00, 871846.92it/s]\n",
"WARNING:root:Calculated sequence length = 282\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: \"model_7\"\n",
"_________________________________________________________________\n",
"Layer (type) Output Shape Param # \n",
"=================================================================\n",
"input (InputLayer) [(None, None)] 0 \n",
"_________________________________________________________________\n",
"layer_embedding (Embedding) (None, None, 100) 666700 \n",
"_________________________________________________________________\n",
"bidirectional_3 (Bidirection (None, 256) 234496 \n",
"_________________________________________________________________\n",
"dense_3 (Dense) (None, 6) 1542 \n",
"_________________________________________________________________\n",
"activation_3 (Activation) (None, 6) 0 \n",
"=================================================================\n",
"Total params: 902,738\n",
"Trainable params: 902,738\n",
"Non-trainable params: 0\n",
"_________________________________________________________________\n",
"Train for 54 steps, validate for 11 steps\n",
"Epoch 1/2\n",
"53/54 [============================>.] - ETA: 0s - loss: 0.2300 - accuracy: 0.9547"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:root:Sequence length is None, will use the max length of the samples, which is 1038\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
" toxic 0.0000 0.0000 0.0000 81\n",
" obscene 0.0000 0.0000 0.0000 44\n",
" insult 0.0000 0.0000 0.0000 46\n",
" identity_hate 0.0000 0.0000 0.0000 10\n",
" severe_toxic 0.0000 0.0000 0.0000 8\n",
" threat 0.0000 0.0000 0.0000 4\n",
" macro avg 0.0000 0.0000 0.0000 193\n",
"\n",
"\n",
"epoch: 0 precision: 0.000000, recall: 0.000000, f1-score: 0.000000\n",
"54/54 [==============================] - 46s 855ms/step - loss: 0.2279 - accuracy: 0.9551 - val_loss: 0.1627 - val_accuracy: 0.9567\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/brikerman/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 2/2\n",
"53/54 [============================>.] - ETA: 0s - loss: 0.1440 - accuracy: 0.9619 precision recall f1-score support\n",
" toxic 0.0000 0.0000 0.0000 81\n",
" obscene 0.0000 0.0000 0.0000 44\n",
" insult 0.0000 0.0000 0.0000 46\n",
" identity_hate 0.0000 0.0000 0.0000 10\n",
" severe_toxic 0.0000 0.0000 0.0000 8\n",
" threat 0.0000 0.0000 0.0000 4\n",
" macro avg 0.0000 0.0000 0.0000 193\n",
"\n",
"\n",
"epoch: 1 precision: 0.000000, recall: 0.000000, f1-score: 0.000000\n",
"54/54 [==============================] - 40s 736ms/step - loss: 0.1422 - accuracy: 0.9624 - val_loss: 0.1673 - val_accuracy: 0.9573\n"
]
}
],
"source": [
"import logging\n",
"from kashgari.tasks.classification import BiLSTM_Model\n",
"from kashgari.callbacks import EvalCallBack\n",
"\n",
"logging.basicConfig(level='DEBUG')\n",
"\n",
"model = BiLSTM_Model(multi_label=True)\n",
"\n",
"eval_callback = EvalCallBack(model,\n",
" test_x,\n",
" test_y,\n",
" step=1)\n",
"\n",
"x = model.fit(train_x, train_y, valid_x, valid_y,\n",
" epochs=2,\n",
" callbacks=[eval_callback])"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
" toxic 0.0000 0.0000 0.0000 351\n",
" obscene 0.0000 0.0000 0.0000 182\n",
" insult 0.0000 0.0000 0.0000 181\n",
" identity_hate 0.0000 0.0000 0.0000 34\n",
" severe_toxic 0.0000 0.0000 0.0000 33\n",
" threat 0.0000 0.0000 0.0000 13\n",
" macro avg 0.0000 0.0000 0.0000 794\n",
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/brikerman/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n"
]
},
{
"data": {
"text/plain": [
"{'precision': 0.0,\n",
" 'recall': 0.0,\n",
" 'f1-score': 0.0,\n",
" 'support': 794,\n",
" 'detail': {'toxic': {'precision': 0.0,\n",
" 'recall': 0.0,\n",
" 'f1': 0.0,\n",
" 'support': 351},\n",
" 'obscene': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'support': 182},\n",
" 'insult': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'support': 181},\n",
" 'identity_hate': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'support': 34},\n",
" 'severe_toxic': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'support': 33},\n",
" 'threat': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'support': 13}}}"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.evaluate(train_x, train_y)\n",
"\n"
]
}
],
"metadata": {
"kernelspec": {
"name": "python37464bitvenvvenv1fddca7c786445709a8f03a12aa91dfc",
"language": "python",
"display_name": "Python 3.7.4 64-bit ('venv': venv)"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}
Loading

0 comments on commit e67c317

Please sign in to comment.