Set up for training

In [1]:
import tensorflow as tf

2023-05-19 23:19:19.292183: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [9]:
vocab_data = ["navigate", "to", "click", "on", "enter","data","in","the","field"]
max_len = 4  # Sequence length to pad the outputs to.

# Create the layer, passing the vocab directly. You can also pass the
# vocabulary arg a path to a file containing one vocabulary word per
# line.

vectorize_layer = tf.keras.layers.TextVectorization(
 max_tokens=100,
 output_mode='int',
 output_sequence_length=max_len,
 vocabulary=vocab_data
 )

# Because we've passed the vocabulary directly, we don't need to adapt
# the layer - the vocabulary is already set. The vocabulary contains the
# padding token ('') and OOV token ('[UNK]') as well as the passed
# tokens.
print(vectorize_layer.get_vocabulary())

# Create the model that uses the vectorize text layer
model = tf.keras.models.Sequential()

# Start by creating an explicit input layer. It needs to have a shape of
# (1,) (because we need to guarantee that there is exactly one string
# input per batch), and the dtype needs to be 'string'.
model.add(tf.keras.Input(shape=(1,), dtype=tf.string))

# The first layer in our model is the vectorization layer. After this
# layer, we have a tensor of shape (batch_size, max_len) containing
# vocab indices.
model.add(vectorize_layer)

#input_data = [["earth no data fire"], ["yes earth"]]

#model.predict(input_data)


['', '[UNK]', 'navigate', 'to', 'click', 'on', 'enter', 'data', 'in', 'the', 'field']


In [2]:
#Define state: this will be natural language
import random
import numpy as np
nlp_training_set = [["navigate to webpage"], ["click on button"], ["enter data in"]]
#nlp_training_set = ["navigate to webpage"]
#res = np.asarray(model.predict(nlp_training_set))
#print(res)
#print(random.choice(nlp_training_set))
#print(nlp_training_set)

In [17]:
#nlp_action_dict

nlp_action_dict = { 
                    0: 
                        { 
                        "nl": "navigate to webpage",
                        "action": 0
                        },
                     1: 
                        { 
                        "nl": "click on button",
                        "action": 1
                        },
                     2:
                        { 
                        "nl": "enter data in",
                        "action": 2
                        }
                }

In [42]:
import random


class EnvNlp:

    def __init__(self, training_data, vocabulary, max_tokens, output_sequence_length, nlp_action_dict):

        """
        Params: 
        
        training_data: is a list of input strings typically used
        nlp_action_dict: is a dictionary where input string and the corresponding action is specified
        
        """
        self.training_data = training_data
        self.vocabulary = vocabulary
        self.max_tokens = max_tokens
        self.output_sequence_length = output_sequence_length
        self.nlp_action_dict = nlp_action_dict

        self.state_size = max_tokens

        # Create the layer, passing the vocab directly.
        self. vectorize_layer = tf.keras.layers.TextVectorization(
                                            max_tokens = self.max_tokens,
                                            output_mode = 'int',
                                            output_sequence_length = self.output_sequence_length,
                                            vocabulary = vocab_data
                                )

        # Create the model that uses the vectorize text layer
        self.model = tf.keras.models.Sequential()  

        #create input layer
        self.model.add(tf.keras.Input(shape=(1,), dtype=tf.string))

        # first layer in our model is the vectorization layer.
        self.model.add(self.vectorize_layer)

        self.state = None
        self.curr_data = None
        self.curr_action = None

    def reset(self):

        """ 
        
        """

        self.curr_data = random.choice(self.training_data)
        
        for key in self.nlp_action_dict:
            
            if self.nlp_action_dict[key]['nl'] == self.curr_data[0]:
                self.curr_action = self.nlp_action_dict[key]['action']
                self.state = np.asarray(self.model.predict(self.curr_data))[0]

       

        return self.state
    
    def env_behaviour(self, action):

        if self.curr_action == action:
            reward = 0.1
            done = True
            #next_state = np.full((self.state_size, ), -1.0)
            next_state = np.full((5, ), -1.0)
        
        else:
            reward = -0.1
            done = False
            next_state = self.state
            
        
        return next_state, reward, done

    def get_token(self, data):

        return np.asarray(self.model.predict(data))[0]
        


In [43]:
##set system path to include relevent modules
import sys
import pathlib
import os
root_folder = pathlib.Path(os.getcwd()).parent.parent.resolve()
script_dir = os.path.join(root_folder, "DQN")
sys.path.append(os.path.dirname(script_dir))

In [44]:
from DQN.dqn_training_nlp import Training
from DQN.dqn_execution_nlp import DQNExecution

In [45]:
#instantiate environment
output_sequence_length = 5
vocab_data = ["navigate", "to", "click", "on", "enter","data","in","the","field"]
env_nlp = EnvNlp(training_data = nlp_training_set, vocabulary = vocab_data, max_tokens=100, output_sequence_length = output_sequence_length, nlp_action_dict = nlp_action_dict)



In [8]:
#define checkpoint for trining and testing
checkpoint_path = 'checkpoint_nlp_action.pth'

In [46]:

training_required = True
#instantiate training
if training_required == True:
    train = Training(state_size = output_sequence_length, action_size = 3, env = env_nlp)

#taining parameters fo rdqn
n_episodes=1000000
max_t=1
eps_start=0.9
eps_end=0.15
eps_decay=0.99
score_window = 100
terminal_score = 0.09 #this is dependent on the environment setup reward system


"""
        
        Params
        ======
            n_episodes (int): maximum number of training episodes
            max_t (int): maximum number of timesteps per episode
            eps_start (float): starting value of epsilon, for epsilon-greedy action selection
            eps_end (float): minimum value of epsilon
            eps_decay (float): multiplicative factor (per episode) for decreasing epsilon
            terminal_score (float): mean of scores for lenght of score_window. training terminates when the mean reaches the specified score
           score_window (int): score of length score window is stored   
"""



In [47]:
if training_required == True:
    scores = train.dqn(n_episodes, max_t, eps_start, eps_end, eps_decay,score_window,terminal_score, checkpoint_path)

action:  1
action:  2
action:  0
action:  2
action:  1
action:  2
action:  1
action:  1
action:  0
action:  1
action:  2
action:  0
action:  1
action:  2
action:  1
action:  1
action:  1
action:  1
action:  1
action:  1
action:  1
action:  1
action:  1
action:  1
action:  2
action:  1
action:  1
action:  1
action:  1
action:  2
action:  2
action:  1
action:  1
action:  2
action:  1
action:  1
action:  1
action:  2
action:  0
action:  1
action:  1
action:  1
action:  1
action:  2
action:  1
action:  0
action:  0
action:  0
action:  0
action:  2
action:  0
action:  0
action:  2
action:  1
action:  1
action:  2
action:  1
action:  1
action:  1
action:  1
action:  1
action:  1
action:  0
action:  1
action:  1
action:  1
action:  1
action:  2
action:  1
action:  1
action:  1
action:  2
action:  1
action:  1
action:  1
action:  1
action:  2
action:  1
action:  1
action:  1
action:  2
action:  2
action:  2
action:  2
action:  1
action:  1
action:  1
action:  1
action:  1
action:  1
action:  2

KeyboardInterrupt: 

In [48]:
#instantiate dqn execution and outputs for each state of training set

#for end2d

#state_size = output_sequence_length, action_size = 3, env = env_nlp

execute = DQNExecution(state_size = output_sequence_length, action_size = 3,env = env_nlp, checkpoint_path = checkpoint_path) #, num_states, num_vertical_grid, num_horizontal_grid, env2d, checkpoint_path)
dqn_actions = {}

for key in nlp_action_dict:
    state_arr = []
    test_arr = []
    state_arr.append(nlp_action_dict[key]['nl'])
    #print("test_arr: \n", test_arr)
    #print("nlp_action_dict[key]['nl']: ", nlp_action_dict[key]['nl'])#test_arr.append((nlp_action_dict[key]['nl'])))
    state = env_nlp.get_token(state_arr)
    print(state)
    action = execute.dqn_execute(state)
    dqn_actions[nlp_action_dict[key]['nl']] = action   

print("computed actions: ",dqn_actions)

[2 3 1 0 0]
[4 5 1 0 0]
[6 7 8 0 0]
computed actions:  {'navigate to webpage': 0, 'click on button': 1, 'enter data in': 2}


In [1]:
import spacy

source_nlp = spacy.load("en_core_web_sm")
nlp = spacy.blank("en")

entity_label = "CUSTOM1"

print(nlp.pipe_names)
nlp.add_pipe("ner", source = source_nlp)
print(nlp.pipe_names)

ner = nlp.get_pipe("ner")
ner.add_label(entity_label)

2023-05-22 11:36:14.659364: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


[]
['ner']


1

In [2]:
""" 
TRAIN_DATA = [
    ("Apple is looking to buy a U.K. startup for $1 billion", {"entities": [(0, 5, "CUST1")]}),
    ("Microsoft acquires another AI startup", {"entities": [(0, 9, "CUST1")]}),
    # more examples...
]
"""

TRAIN_DATA = [
    ("iPhone is coming soon", {"entities": [(0, 5, "CUSTOM1")]}),
    ("iPhone is coming soon", {"entities": [(0, 5, "CUSTOM1")]}),
    ("iPhone is coming soon", {"entities": [(0, 5, "CUSTOM1")]}),
    ("coming soon iPhone", {"entities": [(0, 5, "CUSTOM1")]}),
    ("I love my MacBook Pro", {"entities": [(11, 22, "PRODUCT")]}),
    # more examples...
]

for text, annotations in TRAIN_DATA:
    doc = nlp.make_doc(text)
    example = spacy.training.Example.from_dict(doc, annotations)
    nlp.update([example], losses={})



In [3]:
nlp.to_disk("./trained_model")

In [4]:
nlp = spacy.load("./trained_model")

#text = "Cust12 is looking to acquire a U.K. startup."
text = "iPhone is coming soon"
doc = nlp(text)


doc.ents = [ent for ent in doc.ents if ent.score >= 0.2]

for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [8]:
entity_labels = nlp.get_pipe("ner").labels
print(entity_labels)

print(doc)

('CARDINAL', 'CUSTOM1', 'DATE', 'EVENT', 'FAC', 'GPE', 'LANGUAGE', 'LAW', 'LOC', 'MONEY', 'NORP', 'ORDINAL', 'ORG', 'PERCENT', 'PERSON', 'PRODUCT', 'QUANTITY', 'TIME', 'WORK_OF_ART')
iPhone is coming soon
