## Creates TensorFlow Graphs for Spark NLP NerDLApproach
TensorFlow: `1.15.0`


In [5]:
import numpy as np
import os
import tensorflow as tf
import string
import random
import math
import sys
import shutil

sys.path.append('../lib/ner')
from ner_model import NerModel
from dataset_encoder import DatasetEncoder
from ner_model_saver import NerModelSaver
from pathlib import Path

In [6]:
print(tf.__version__)
print(tf.keras.__version__)
tf.get_logger().setLevel('INFO')

1.15.0
2.2.4-tf


## SETTINGS

In [7]:
# By default the first GPU is used, you can change it here
gpu_device=0

In [8]:
def create_graph(ntags, embeddings_dim, nchars, lstm_size = 128):
    name_prefix = 'blstm'
    if sys.version_info[0] != 3 or sys.version_info[1] >= 7:
        print('Python 3.6 or above not supported by tensorflow')
        return
    if tf.__version__ != '1.15.0':
        print('Spark NLP is compiled with TensorFlow 1.13.1, Please use such version.')
        print('Current TensorFlow version: ', tf.__version__)
        return
    tf.compat.v1.disable_v2_behavior()
    tf.compat.v1.reset_default_graph()
    model_name = name_prefix+'_{}_{}_{}_{}'.format(ntags, embeddings_dim, lstm_size, nchars)
    with tf.compat.v1.Session() as session:
        ner = NerModel(session=None, use_contrib=False, use_gpu_device=gpu_device)
        ner.add_cnn_char_repr(nchars, 25, 30)
        ner.add_bilstm_char_repr(nchars, 25, 30)
        ner.add_pretrained_word_embeddings(embeddings_dim)
        ner.add_context_repr(ntags, lstm_size, 3)
        ner.add_inference_layer(True)
        ner.add_training_op(5)
        ner.init_variables()
        saver = tf.compat.v1.train.Saver()
        file_name = model_name + '.pb'
        tf.io.write_graph(ner.session.graph, './', file_name, False)
        ner.close()
        session.close()

### Attributes info
- 1st attribute: max number of tags (Must be at least equal to the number of unique labels, including O if IOB)
- 2nd attribute: embeddings dimension
- 3rd attribute: max number of characters processed (Must be at least the largest possible amount of characters)
- 4th attribute: LSTM Size (128)

In [9]:
# # CoNLL 2003 - English - GloVe 100d
# create_graph(9, 100, 90)

# # CoNLL 2003 - English - GloVe 200d
# create_graph(9, 200, 90)

# # CoNLL 2003 - English - GloVe 300d
# create_graph(9, 300, 90)

# # CoNLL 2003 - English - BERT Base
# create_graph(9, 768, 90)

# # CoNLL 2003 - English - BERT Large
# create_graph(9, 1024, 90)

# # You got the idea :)
create_graph(10, 100, 100)
create_graph(10, 300, 100)
create_graph(10, 768, 100)
create_graph(10, 1024, 100)
create_graph(25, 100, 100)
create_graph(25, 300, 100)
create_graph(30, 100, 100)
create_graph(30, 300, 100)



Instructions for updating:
non-resource variables are not supported in the long term
Device mapping:
/job:localhost/replica:0/task:0/device:XLA_CPU:0 -> device: XLA_CPU device

Instructions for updating:
If using Keras pass *_constraint arguments to layers.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If

In [None]:
os._exit(00)