In [1]:
from keras.preprocessing.sequence import pad_sequences
from keras.layers import Embedding, LSTM, Dense, Dropout
from keras.preprocessing.text import Tokenizer
from keras.callbacks import EarlyStopping
from keras.models import Sequential
import keras.utils as ku 
import numpy as np 

import scipy.stats as sts

import tensorflow as tf

Using TensorFlow backend.


In [2]:
frosty_tokenizer = Tokenizer()
frosty_data = open('robert_frost.txt').read()

slim_tokenizer = Tokenizer()
slim_data = open('slim_shady.txt').read()

In [3]:
def dataset_preparation(data, tokenizer):

    # basic cleanup
    corpus = data.lower().split("\n")

    # tokenization
    tokenizer.fit_on_texts(corpus)
    total_words = len(tokenizer.word_index) + 1

    # create input sequences using list of tokens
    input_sequences = []
    for line in corpus:
        token_list = tokenizer.texts_to_sequences([line])[0]
        for i in range(1, len(token_list)):
            n_gram_sequence = token_list[:i+1]
            input_sequences.append(n_gram_sequence)

    # pad sequences 
    max_sequence_len = max([len(x) for x in input_sequences])
    input_sequences = np.array(pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre'))

    # create predictors and label
    predictors, label = input_sequences[:,:-1],input_sequences[:,-1]
    label = ku.to_categorical(label, num_classes=total_words)

    return predictors, label, max_sequence_len, total_words

In [4]:
slim_predictors, slim_label, slim_max_sequence_len, slim_total_words = dataset_preparation(slim_data,slim_tokenizer)
frosty_predictors, frosty_label, frosty_max_sequence_len, frosty_total_words = dataset_preparation(frosty_data,frosty_tokenizer)

In [5]:
def create_model(predictors, label, max_sequence_len, total_words):
    model = Sequential()
    model.add(Embedding(total_words, 10, input_length=max_sequence_len-1))
    model.add(LSTM(150, return_sequences = True))
    # model.add(Dropout(0.2))
    model.add(LSTM(100))
    model.add(Dense(total_words, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    earlystop = EarlyStopping(monitor='val_loss', min_delta=0, patience=5, verbose=0, mode='auto')
    model.fit(predictors, label, epochs=100, verbose=1, callbacks=[earlystop])
    print(model.summary())

    return model 

In [6]:
frosty_model = create_model(frosty_predictors,
                            frosty_label, 
                            frosty_max_sequence_len, 
                            frosty_total_words)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 2/100
 160/9519 [..............................] - ETA: 9s - loss: 6.3138 - acc: 0.0375   



Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
embedding_1 (Embedding)      (None, 11, 10)            22730     
_________________________________________________________________
lstm_1 (LSTM)                (None, 11, 150)           96600     
_________________________________________________________________
lstm_2 (LSTM)                (None, 100)               100400    
_________________________________________________________________
dense_1 (Dense)              (None, 2273)              229573    
Total params: 449,303
Trainable params: 449,303
Non-trainable params: 0
_________________________________________________________________
None


In [7]:
slim_model = create_model(slim_predictors, 
                          slim_label, 
                          slim_max_sequence_len, 
                          slim_total_words)

Epoch 1/100
 50976/307631 [===>..........................] - ETA: 3:03:34 - loss: 7.2434 - acc: 0.0292

KeyboardInterrupt: 

In [None]:
def generate_text(model, seed_text, next_words, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
        predicted = model.predict_classes(token_list, verbose=0)
        
        output_word = ""
        for word, index in tokenizer.word_index.items():
            if index == predicted:
                output_word = word
                break
        seed_text += " " + output_word
    return seed_text

In [None]:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# List of all the state variables in the system and their initial values

prompt_string = "Double down divergent road rap"

initial_conditions = {
    'transcript': prompt_string, # collaborative text
    'slim_says' : "",
    'frost_says': "",
    'slim': (0.0,0.0, 0.0), #slim's opinions: scores of the suggestions, (fullprior,+slim,+frosty) 
    'frosty':(0.0,0.0, 0.0) #frosty opinions: scores of the suggestions, (fullprior,+slim,+frosty) 
}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 

In [None]:
#extra utility, may not be needed
def new_line(transcript, max_sequence_len=12):
    
    current_line = transcript.split('\n')[-1]
    len_current_line = current_line.split(' ')
    
    val = bool(len_current_line>max_sequence_len)
    
    return val


In [None]:
def get_slim_suggestions(params, step, sL, s):
    
    #user model evaluate
    transcript = s['transcript']
    current_line = transcript.split('\n')[-1]
    
    lam = 3
    rv = sts.poisson.rvs(lam)
    
    string = generate_text(slim_model, current_line, rv, frosty_max_sequence_len)
    
    #higher should be better
    return {'slim_says': string}

def get_frosty_suggestion(params, step, sL, s):
    
    #user model Generate:
    transcript = s['transcript']
    current_line = transcript.split('\n')[-1]
    
    lam = 3
    rv = sts.poisson.rvs(lam)
    
    string = generate_text(frosty_model,current_line, rv, frosty_max_sequence_len)
    
    #higher should be better
    return {'frosty_says': string}

def store_slim_suggestion(params, step, sL, s, _input):
    
    y = 'slim_says'
    x = _input['slim_says']
    
    return(y, x)

def store_frosty_suggestion(params, step, sL, s, _input):
    
    y = 'frosty_says'
    x = _input['frosty_says']
    
    return(y, x) 

# IMPROVE - Tune
def collaborate(params, step, sL, s):
    
    #use the suggestions and compute opinions
    slim_says = s['slim_says']
    frosty_says = s['frost_says']
    prior_transcript = s['transcript']    
    
    #some function of evaluate methods from the RNNS
    
    #evaluates the existing script
    slim_opinion_prior_transcript = get_slim_opinion(prior_transcript, "")
    frosty_opinion_prior_transcript = get_frosty_opinion(prior_transcript, "")
    
    #evaluates the scipts with the new words
    frosty_opinion_frosty_says =get_frosty_opinion(prior_transcript, frosty_says)
    slim_opinion_frosty_says = get_slim_opinion(prior_transcript, frosty_says)
    slim_opinion_slim_says = get_slim_opinion(prior_transcript, slim_says)
    frosty_opinion_slim_says = get_frosty_opinion(prior_transcript, slim_says)
    
    opinion_dict = {
                    'slim':(slim_opinion_prior_transcript,
                            lim_opinion_slim_says,
                            slim_opinion_frosty_says), 
                    'frosty':(frosty_opinion_prior_transcript,
                             frosty_opinion_slim_says,
                             frosty_opinion_frosty_says)
                   }
    
    return opinion_dict

In [None]:
# UPDATE - call evaluate function
def get_slim_opinion(transcript, suggested_word):

    #use slim_model evaluate
    
    #higher should be better
    return score

def get_frosty_opinion(transcript, suggested_word):
    
    #use frosty_model evaluate
    
    #higher should be better
    return score

def choose(transcript, suggestions, opinions):
    
    #suggestions (slim, frosty) and opinions (slim, frosty)
    
    #heurstic funciton of the opinions returns 0 or 1
    choice = 0 #or 1

    return suggestions[choice]

In [None]:
def update_transcript(params, step, sL, s, _input):
    
    prior_transcript = s['transcript']
    opinions = (_input['slim'],  _input['frosty'])
    suggestions = (s['slim_says'], s['frosty_says'])
    
    y = 'transcript'
    
    x = choose(prior_transcript, suggestions, opinions)
    
    return (y, x)

def store_slim_opinion(params, step, sL, s, _input):
    
    opinion = _input['slim']
    
    y = 'slim'
    
    x = opinion
    
    return (y, x)

def store_frosty_opinion(params, step, sL, s, _input):
    
    opinion = _input['frosty']
    
    y = 'frosty'
    
    x = opinion
    
    return (y, x)


In [None]:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# In the Partial State Update Blocks, the user specifies if state update functions will be run in series or in parallel
partial_state_update_blocks = [
    { 
        'policies': {
            'slim':get_slim_suggestion,
            'frosty':get_frosty_suggestion 
        },
        'variables': { # The following state variables will be updated simultaneously
            'slim_says': store_slim_suggestion,
            'frosty_says': store_frosty_suggestion
        }
    },
    { 
        'policies': {
            'collaborate': collaborate # Improve
        },
        'variables': { # The following state variables will be updated simultaneously
            'transcript': update_transcript,
            'slim':store_slim_opinions,
            'frosty': store_frosty_opinions
        }
    }
]
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 

# Simulation Configuration Parameters
Lastly, we define the number of timesteps and the number of Monte Carlo runs of the simulation. These parameters must be passed in a dictionary, in `dict_keys` `T` and `N`, respectively. In our example, we'll run the simulation for 10 timesteps. And because we are dealing with a deterministic system, it makes no sense to have multiple Monte Carlo runs, so we set `N=1`. We'll ignore the `M` key for now and set it to an empty `dict`

In [None]:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# Settings of general simulation parameters, unrelated to the system itself
# `T` is a range with the number of discrete units of time the simulation will run for;
# `N` is the number of times the simulation will be run (Monte Carlo runs)
# In this example, we'll run the simulation once (N=1) and its duration will be of 10 timesteps
# We'll cover the `M` key in a future article. For now, let's leave it empty
simulation_parameters = {
    'T': range(10),
    'N': 1,
    'M': {}
}
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 

# Putting it all together
We have defined the state variables of our system and their initial conditions, as well as the state update functions, which have been grouped in a single state update block. We have also specified the parameters of the simulation (number of timesteps and runs). We are now ready to put all those pieces together in a `Configuration` object.

In [None]:
from cadCAD.configuration import Configuration

# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
# The configurations above are then packaged into a `Configuration` object
config = Configuration(initial_state=initial_conditions, #dict containing variable names and initial values
                       partial_state_update_blocks=partial_state_update_blocks, #dict containing state update functions
                       sim_config=simulation_parameters #dict containing simulation parameters
                      )

# Running the engine
We are now ready to run the engine with the configuration defined above. Instantiate an ExecutionMode, an ExecutionContext and an Executor objects, passing the Configuration object to the latter. Then run the `main()` method of the Executor object, which returns the results of the experiment in the first element of a tuple.

In [None]:
%%capture
from cadCAD.engine import ExecutionMode, ExecutionContext, Executor
exec_mode = ExecutionMode()
exec_context = ExecutionContext(exec_mode.single_proc)
executor = Executor(exec_context, [config]) # Pass the configuration object inside an array
raw_result, tensor = executor.main() # The `main()` method returns a tuple; its first elements contains the raw results

# Analyzing the results
We can now convert the raw results into a DataFrame for analysis

In [None]:
%matplotlib inline
import pandas as pd
df = pd.DataFrame(raw_result)
df.set_index(['run', 'timestep', 'substep'])

---

_About BlockScience_  
[BlockScience](http://bit.ly/github_articles_M_1) is a research and engineering firm specialized in complex adaptive systems and applying practical methodologies from engineering design, development and testing to projects in emerging technologies such as blockchain. Follow us on [Medium](http://bit.ly/bsci-medium) or [Twitter](http://bit.ly/bsci-twitter) to stay in touch.