🚧 Work in progress.

BrikerMan · May 10, 2020 · e67c317 · e67c317
1 parent 4ede342
commit e67c317
Show file tree

Hide file tree

Showing 21 changed files with 1,493 additions and 660 deletions.
diff --git a/docs/conf.py b/docs/conf.py
@@ -48,9 +48,6 @@ def __getattr__(cls, name):
         'bert4keras',
         'bert4keras.models',
         'sklearn',
-        'seqeval',
-        'seqeval.metrics',
-        'seqeval.metrics.sequence_labeling',
         'bert4keras.layers'
     ]
 else:

diff --git a/examples/classification_banchmark.ipynb b/examples/classification_banchmark.ipynb
diff --git a/examples/classification_banchmark_multi_label.ipynb b/examples/classification_banchmark_multi_label.ipynb
@@ -0,0 +1,270 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
+   "source": [
+    "# Multi-Label Classification Benchmark\n",
+    "\n",
+    "- Kashgari: 2.0.0\n",
+    "- TensorFlow: 2.0.0\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "source": [
+    "## Macros"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "CORPUS_PATH = '/Users/brikerman/Downloads/jigsaw-toxic-comment-classification-challenge/train.csv'"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "outputs": [],
+   "source": [
+    "from kashgari.corpus import JigsawToxicCommentCorpus\n",
+    "corpus = JigsawToxicCommentCorpus(CORPUS_PATH)\n",
+    "\n",
+    "train_x, train_y = corpus.load_data()\n",
+    "test_x,  test_y  = corpus.load_data('test')\n",
+    "valid_x, valid_y = corpus.load_data('valid')"
+   ],
+   "metadata": {
+    "collapsed": false,
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   }
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "Preparing text vocab dict: 100%|██████████| 3510/3510 [00:00<00:00, 43976.22it/s]\n",
+      "INFO:root:------ Build vocab dict finished, Top 10 token ------\n",
+      "INFO:root:Token: [PAD]    -> 0\n",
+      "INFO:root:Token: [UNK]    -> 1\n",
+      "INFO:root:Token: [BOS]    -> 2\n",
+      "INFO:root:Token: [EOS]    -> 3\n",
+      "INFO:root:Token: .        -> 4\n",
+      "INFO:root:Token: the      -> 5\n",
+      "INFO:root:Token: ,        -> 6\n",
+      "INFO:root:Token: \"        -> 7\n",
+      "INFO:root:Token: to       -> 8\n",
+      "INFO:root:Token: i        -> 9\n",
+      "INFO:root:------ Build vocab dict finished, Top 10 token ------\n",
+      "Preparing classification label vocab dict: 100%|██████████| 3510/3510 [00:00<00:00, 938723.91it/s]\n",
+      "Calculating sequence length: 100%|██████████| 3510/3510 [00:00<00:00, 871846.92it/s]\n",
+      "WARNING:root:Calculated sequence length = 282\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model: \"model_7\"\n",
+      "_________________________________________________________________\n",
+      "Layer (type)                 Output Shape              Param #   \n",
+      "=================================================================\n",
+      "input (InputLayer)           [(None, None)]            0         \n",
+      "_________________________________________________________________\n",
+      "layer_embedding (Embedding)  (None, None, 100)         666700    \n",
+      "_________________________________________________________________\n",
+      "bidirectional_3 (Bidirection (None, 256)               234496    \n",
+      "_________________________________________________________________\n",
+      "dense_3 (Dense)              (None, 6)                 1542      \n",
+      "_________________________________________________________________\n",
+      "activation_3 (Activation)    (None, 6)                 0         \n",
+      "=================================================================\n",
+      "Total params: 902,738\n",
+      "Trainable params: 902,738\n",
+      "Non-trainable params: 0\n",
+      "_________________________________________________________________\n",
+      "Train for 54 steps, validate for 11 steps\n",
+      "Epoch 1/2\n",
+      "53/54 [============================>.] - ETA: 0s - loss: 0.2300 - accuracy: 0.9547"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "WARNING:root:Sequence length is None, will use the max length of the samples, which is 1038\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                      precision    recall  f1-score   support\n",
+      "               toxic     0.0000    0.0000    0.0000        81\n",
+      "             obscene     0.0000    0.0000    0.0000        44\n",
+      "              insult     0.0000    0.0000    0.0000        46\n",
+      "       identity_hate     0.0000    0.0000    0.0000        10\n",
+      "        severe_toxic     0.0000    0.0000    0.0000         8\n",
+      "              threat     0.0000    0.0000    0.0000         4\n",
+      "           macro avg     0.0000    0.0000    0.0000       193\n",
+      "\n",
+      "\n",
+      "epoch: 0 precision: 0.000000, recall: 0.000000, f1-score: 0.000000\n",
+      "54/54 [==============================] - 46s 855ms/step - loss: 0.2279 - accuracy: 0.9551 - val_loss: 0.1627 - val_accuracy: 0.9567\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/brikerman/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, msg_start, len(result))\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Epoch 2/2\n",
+      "53/54 [============================>.] - ETA: 0s - loss: 0.1440 - accuracy: 0.9619                      precision    recall  f1-score   support\n",
+      "               toxic     0.0000    0.0000    0.0000        81\n",
+      "             obscene     0.0000    0.0000    0.0000        44\n",
+      "              insult     0.0000    0.0000    0.0000        46\n",
+      "       identity_hate     0.0000    0.0000    0.0000        10\n",
+      "        severe_toxic     0.0000    0.0000    0.0000         8\n",
+      "              threat     0.0000    0.0000    0.0000         4\n",
+      "           macro avg     0.0000    0.0000    0.0000       193\n",
+      "\n",
+      "\n",
+      "epoch: 1 precision: 0.000000, recall: 0.000000, f1-score: 0.000000\n",
+      "54/54 [==============================] - 40s 736ms/step - loss: 0.1422 - accuracy: 0.9624 - val_loss: 0.1673 - val_accuracy: 0.9573\n"
+     ]
+    }
+   ],
+   "source": [
+    "import logging\n",
+    "from kashgari.tasks.classification import BiLSTM_Model\n",
+    "from kashgari.callbacks import EvalCallBack\n",
+    "\n",
+    "logging.basicConfig(level='DEBUG')\n",
+    "\n",
+    "model = BiLSTM_Model(multi_label=True)\n",
+    "\n",
+    "eval_callback = EvalCallBack(model,\n",
+    "                            test_x,\n",
+    "                            test_y,\n",
+    "                            step=1)\n",
+    "\n",
+    "x = model.fit(train_x, train_y, valid_x, valid_y,\n",
+    "          epochs=2,\n",
+    "          callbacks=[eval_callback])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "                      precision    recall  f1-score   support\n",
+      "               toxic     0.0000    0.0000    0.0000       351\n",
+      "             obscene     0.0000    0.0000    0.0000       182\n",
+      "              insult     0.0000    0.0000    0.0000       181\n",
+      "       identity_hate     0.0000    0.0000    0.0000        34\n",
+      "        severe_toxic     0.0000    0.0000    0.0000        33\n",
+      "              threat     0.0000    0.0000    0.0000        13\n",
+      "           macro avg     0.0000    0.0000    0.0000       794\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/brikerman/Desktop/python/Kashgari2/venv/lib/python3.7/site-packages/sklearn/metrics/_classification.py:1272: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
+      "  _warn_prf(average, modifier, msg_start, len(result))\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "{'precision': 0.0,\n",
+       " 'recall': 0.0,\n",
+       " 'f1-score': 0.0,\n",
+       " 'support': 794,\n",
+       " 'detail': {'toxic': {'precision': 0.0,\n",
+       "   'recall': 0.0,\n",
+       "   'f1': 0.0,\n",
+       "   'support': 351},\n",
+       "  'obscene': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'support': 182},\n",
+       "  'insult': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'support': 181},\n",
+       "  'identity_hate': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'support': 34},\n",
+       "  'severe_toxic': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'support': 33},\n",
+       "  'threat': {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'support': 13}}}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "model.evaluate(train_x, train_y)\n",
+    "\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "name": "python37464bitvenvvenv1fddca7c786445709a8f03a12aa91dfc",
+   "language": "python",
+   "display_name": "Python 3.7.4 64-bit ('venv': venv)"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}