In [1]:
import dynet as dy
import numpy as np

In [2]:
m = dy.ParameterCollection()
pW = m.add_parameters((8,2))
pV = m.add_parameters((1,8))
pb = m.add_parameters((8))

dy.renew_cg() # new computation graph. not strictly needed here, but good practice.

# associate the parameters with cg Expressions
W = dy.parameter(pW)
V = dy.parameter(pV)
b = dy.parameter(pb)

In [3]:
b.value()

[0.39957764744758606,
 -0.13521379232406616,
 -0.3813660740852356,
 -0.37368789315223694,
 0.5874922871589661,
 0.02694915048778057,
 0.266682893037796,
 0.5391783118247986]

In [4]:
x = dy.vecInput(2) # an input vector of size 2. Also an expression.
output = dy.logistic(V*(dy.tanh((W*x)+b)))


In [5]:
x.set([0,0])
output.value()


0.47301405668258667

In [6]:
y = dy.scalarInput(0) # this will hold the correct answer
loss = dy.binary_log_loss(output, y)
loss.value()

0.6405814290046692

In [7]:
x.set([1,0])
y.set(0)
print (loss.value())

y.set(1)
print (loss.value())

x.set([0,0])
y.set(0)
print (loss.value())

0.7440601587295532
0.6447012424468994
0.6405814290046692


In [8]:
trainer = dy.SimpleSGDTrainer(m)

In [9]:
x.set([1,0])
y.set(1)
loss_value = loss.value() # this performs a forward through the network.
print ("the loss before step is:",loss_value)

# now do an optimization step
loss.backward()  # compute the gradients
trainer.update()

# see how it affected the loss:
loss_value = loss.value(recalculate=True) # recalculate=True means "don't use precomputed value"
print ("the loss after step is:",loss_value)

the loss before step is: 0.6447012424468994
the loss after step is: 0.5604482889175415


In [10]:
def create_xor_instances(num_rounds=2000):
    questions = []
    answers = []
    for round in range(num_rounds):
        for x1 in 0,1:
            for x2 in 0,1:
                answer = 0 if x1==x2 else 1
                questions.append((x1,x2))
                answers.append(answer)
    return questions, answers

questions, answers = create_xor_instances()

In [11]:
total_loss = 0
seen_instances = 0
for question, answer in zip(questions, answers):
    x.set(question)
    y.set(answer)
    seen_instances += 1
    total_loss += loss.value()
    loss.backward()
    trainer.update()
    if (seen_instances > 1 and seen_instances % 500 == 0):
        print ("average loss is:",total_loss / seen_instances)


average loss is: 0.43171693159639835
average loss is: 0.24627967279590665
average loss is: 0.1721019630841911
average loss is: 0.13259905643644743
average loss is: 0.10804088731147349
average loss is: 0.09127307015533248
average loss is: 0.07908279789738092
average loss is: 0.069812831700081
average loss is: 0.06252118222420622
average loss is: 0.056632498042588124
average loss is: 0.05177531759843061
average loss is: 0.047698996481854314
average loss is: 0.04422821692488371
average loss is: 0.0412366271018483
average loss is: 0.03863083712146617
average loss is: 0.03634031140322622


In [12]:
x.set([0,1])
print ("0,1",output.value())

x.set([1,0])
print ("1,0",output.value())

x.set([0,0])
print ("0,0",output.value())

x.set([1,1])
print ("1,1",output.value())


0,1 0.997960090637207
1,0 0.9977418184280396
0,0 0.0010158593067899346
1,1 0.0022931243292987347


In [13]:
W.value()

array([[ 0.53565913,  0.69983494],
       [ 2.96737266, -3.60019517],
       [ 1.21056926,  1.22047853],
       [-1.27657628,  0.60889524],
       [ 3.27553821, -2.57272363],
       [-1.26496565,  0.51268899],
       [-0.11418127, -0.21923806],
       [ 2.33958912,  2.33326292]])

In [14]:
# create a network for the xor problem given input and output
def create_xor_network(pW, pV, pb, inputs, expected_answer):
    dy.renew_cg() # new computation graph
    W = dy.parameter(pW) # add parameters to graph as expressions
    V = dy.parameter(pV)
    b = dy.parameter(pb)
    x = dy.vecInput(len(inputs))
    x.set(inputs)
    y = dy.scalarInput(expected_answer)
    output = dy.logistic(V*(dy.tanh((W*x)+b)))
    loss =  dy.binary_log_loss(output, y)
    return loss

m2 = dy.ParameterCollection()
pW = m2.add_parameters((8,2))
pV = m2.add_parameters((1,8))
pb = m2.add_parameters((8))
trainer = dy.SimpleSGDTrainer(m2)

seen_instances = 0
total_loss = 0
for question, answer in zip(questions, answers):
    loss = create_xor_network(pW, pV, pb, question, answer)
    seen_instances += 1
    total_loss += loss.value()
    loss.backward()
    trainer.update()
    if (seen_instances > 1 and seen_instances % 100 == 0):
        print ("average loss is:",total_loss / seen_instances)



average loss is: 0.7094781422615051
average loss is: 0.6628954063355923
average loss is: 0.5842473302284876
average loss is: 0.49666533261537554
average loss is: 0.4237849298417568
average loss is: 0.3673067914818724
average loss is: 0.3234326565957495
average loss is: 0.28870834081666547
average loss is: 0.26065978465187883
average loss is: 0.2375765295624733
average loss is: 0.2182665043264966
average loss is: 0.20188277295247342
average loss is: 0.18781058445931054
average loss is: 0.1755943974001067
average loss is: 0.16489014871201169
average loss is: 0.15543344435398468
average loss is: 0.14701792572597588
average loss is: 0.13948026080098416
average loss is: 0.13268953273636533
average loss is: 0.12653960262238978
average loss is: 0.1209435277614033
average loss is: 0.11582941535479305
average loss is: 0.11113730798900613
average loss is: 0.10681680545977239
average loss is: 0.10282523674741387
average loss is: 0.09912623139277388
average loss is: 0.0956885992727001
average loss

In [15]:
m = dy.ParameterCollection()

# add parameters to parameter collection
pW = m.add_parameters((10,30))
pB = m.add_parameters(10)
lookup = m.add_lookup_parameters((500, 10))
print ("added")

# create trainer
trainer = dy.SimpleSGDTrainer(m)

# Regularization is set via the --dynet-l2 commandline flag.
# Learning rate parameters can be passed to the trainer:
# alpha = 0.1  # learning rate
# trainer = dy.SimpleSGDTrainer(m, e0=alpha)

# function for graph creation
def create_network_return_loss(inputs, expected_output):
    """
    inputs is a list of numbers
    """
    dy.renew_cg()
    W = dy.parameter(pW) # from parameters to expressions
    b = dy.parameter(pB)
    emb_vectors = [lookup[i] for i in inputs]
    net_input = dy.concatenate(emb_vectors)
    net_output = dy.softmax( (W*net_input) + b)
    loss = -dy.log(dy.pick(net_output, expected_output))
    return loss

# function for prediction
def create_network_return_best(inputs):
    """
    inputs is a list of numbers
    """
    dy.renew_cg()
    W = dy.parameter(pW)
    b = dy.parameter(pB)
    emb_vectors = [lookup[i] for i in inputs]
    net_input = dy.concatenate(emb_vectors)
    net_output = dy.softmax( (W*net_input) + b)
    return np.argmax(net_output.npvalue())


# train network
for epoch in range(5):
    for inp,lbl in ( ([1,2,3],1), ([3,2,4],2) ):
        print (inp, lbl)
        loss = create_network_return_loss(inp, lbl)
        print (loss.value()) # need to run loss.value() for the forward prop
        loss.backward()
        trainer.update()

print (create_network_return_best([1,2,3]))


added
[1, 2, 3] 1
1.6750271320343018
[3, 2, 4] 2
3.186349868774414
[1, 2, 3] 1
1.329147458076477
[3, 2, 4] 2
2.654533624649048
[1, 2, 3] 1
1.0684216022491455
[3, 2, 4] 2
2.1631932258605957
[1, 2, 3] 1
0.868177056312561
[3, 2, 4] 2
1.7094173431396484
[1, 2, 3] 1
0.7108930349349976
[3, 2, 4] 2
1.304443359375
1


In [45]:
dy.renew_cg()
pc = dy.ParameterCollection()
NUM_LAYERS=3
INPUT_DIM=50
HIDDEN_DIM1=10
HIDDEN_DIM2=20
builder = dy.LSTMBuilder(NUM_LAYERS, INPUT_DIM, HIDDEN_DIM1, pc)
# or:
# builder = dy.SimpleRNNBuilder(NUM_LAYERS, INPUT_DIM, HIDDEN_DIM, pc)


TypeError: Argument 'model' has incorrect type (expected _dynet.ParameterCollection, got int)

In [37]:
s0 = builder.initial_state()

In [38]:

x1 = dy.vecInput(INPUT_DIM)

In [39]:
s1=s0.add_input(x1)
y1 = s1.output()
# here, we add x1 to the RNN, and the output we get from the top is y (a HIDEN_DIM-dim vector)

In [40]:
y1.npvalue().shape

(10,)

In [41]:
s2=s1.add_input(x1) # we can add another input
y2=s2.output()
y2.npvalue().shape

(10,)

In [42]:
print( s2.h())

(expression 63/8016, expression 78/8016, expression 93/8016)


In [43]:
# create a simple rnn builder
rnnbuilder=dy.SimpleRNNBuilder(NUM_LAYERS, INPUT_DIM, HIDDEN_DIM, pc)

# initialize a new graph, and a new sequence
rs0 = rnnbuilder.initial_state()

# add inputs
rs1 = rs0.add_input(x1)
ry1 = rs1.output()
print ("all layers:", s1.h())

all layers: (expression 22/8016, expression 35/8016, expression 48/8016)


In [34]:

print (s1.s())

(expression 14/8015, expression 16/8015)


In [35]:
rnn_h  = rs1.h()
rnn_s  = rs1.s()
print ("RNN h:", rnn_h)
print ("RNN s:", rnn_s)


lstm_h = s1.h()
lstm_s = s1.s()
print ("LSTM h:", lstm_h)
print ("LSTM s:", lstm_s)


RNN h: (expression 36/8015,)
RNN s: (expression 36/8015,)
LSTM h: (expression 16/8015,)
LSTM s: (expression 14/8015, expression 16/8015)
