# FUNCTION GENERATOR using Policy Gradient

Useful links:
Policy Gradient Explanation: http://karpathy.github.io/2016/05/31/rl/ <br>
Example of Policy Gradient: https://github.com/keon/policy-gradient

In [1]:
import numpy as np
from keras.models import Sequential, Model
from keras.layers import TimeDistributed, Dense, Reshape, Flatten, GRU, Input, Embedding
from keras.optimizers import Adam
from keras.layers.convolutional import Convolution2D
from PolicyGradientModel import PolicyGradientModel
from RewardCalculator import RewardCalculator

Using TensorFlow backend.


In [2]:
ALLOWED_PARAMETERS = list('XY')
ALLOWED_SYMBOLS = ALLOWED_PARAMETERS + list('0123456789+-*/#')
NUM_SYMBOLS = len(ALLOWED_SYMBOLS)
MAX_LENGTH = 30 # Max length of the output expression
CORRECT_EXPRESSION = "3*X+2*Y"

### DEFINE MODEL

In [3]:
def getModel():
    # Trying to neglect input
    input1 = Input(shape=(1,1))
    # TODO: Add noise layer to make output vary
    x = GRU(32)(input1)
    out = Dense(NUM_SYMBOLS, activation='sigmoid')(x)
    model = Model(inputs=input1, outputs=out)
    model.compile(optimizer=Adam(),
                loss='categorical_crossentropy')
    return model

In [4]:
rewardCalculator = RewardCalculator(correctExpression=CORRECT_EXPRESSION,
                                    parameters=ALLOWED_PARAMETERS,
                                    usingFunctionDifferenceReward=False,
                                    usingCompilableReward=True,
                                    usingLengthReward=True,
                                    usingFoundSymbolReward=True,
                                    usingFile=False)

In [None]:
model = getModel()
pgModel = PolicyGradientModel(model=model,
                              allowedSymbol=ALLOWED_SYMBOLS,
                              numSymbol=NUM_SYMBOLS,
                              maxLength=MAX_LENGTH,
                              rewardCalculator=rewardCalculator,
                              learningRate=0.0001,
                              fileName="Model1.hdf5")

## TRAINING

In [None]:
pgModel.train(input=np.ones((1,1,1)))

Epoch: 0	Loss: 23.8873876572	Example Output: 4-X*4Y471618155388599	Example Reward:  -1
Saving Weight
Epoch: 1	Loss: 23.8992904663	Example Output: 	Example Reward:  -1.0
Epoch: 2	Loss: 23.9254896164	Example Output: /X05-8678556*5710*X*X/63/16/+8	Example Reward:  -1
Epoch: 3	Loss: 23.9479198456	Example Output: 307-3+9310	Example Reward:  0.84
Epoch: 4	Loss: 23.95912323	Example Output: 906YY*9827*070*11++	Example Reward:  -1
Epoch: 5	Loss: 23.9626302719	Example Output: 17-72	Example Reward:  0.92
Epoch: 6	Loss: 23.96334095	Example Output: 	Example Reward:  -1.0
Epoch: 7	Loss: 23.9755802155	Example Output: 621/1/888138/570	Example Reward:  0.74
Epoch: 8	Loss: 23.9911588669	Example Output: 14+*8*/58/	Example Reward:  -1
Epoch: 9	Loss: 23.9982307434	Example Output: 18Y6++8+9X4*00835+Y/5*	Example Reward:  -1
Epoch: 10	Loss: 24.0075727463	Example Output: 91/3+/+73	Example Reward:  -1
Saving Weight
Epoch: 11	Loss: 24.010027504	Example Output: 4/175/Y2+986-	Example Reward:  -1
Epoch: 12	Loss: 24

Epoch: 94	Loss: 24.1516242981	Example Output: +*-430-77727Y9	Example Reward:  -1
Epoch: 95	Loss: 24.1438280106	Example Output: 4039*1320X5-**58266-88/5-Y18+9	Example Reward:  -1
Epoch: 96	Loss: 24.1428466797	Example Output: 4764-187/79X**09Y-Y3Y19210**6-	Example Reward:  -1
Epoch: 97	Loss: 24.1522584915	Example Output: 3+267095581+3*4650664761-1/6Y3	Example Reward:  -1
Epoch: 98	Loss: 24.1515504837	Example Output: /33708X16/53Y504200123833/9YY3	Example Reward:  -1
Epoch: 99	Loss: 24.137622261	Example Output: 5508-68793251379*/61*5746*-680	Example Reward:  -1
Epoch: 100	Loss: 24.1313196182	Example Output: X+5Y+038/13-Y4/-91930-61X8199Y	Example Reward:  -1
Saving Weight
Epoch: 101	Loss: 24.1376609802	Example Output: 88+21672+031*62*8616563730671+	Example Reward:  -1
Epoch: 102	Loss: 24.1362560272	Example Output: 5543861288-X51739*845+67+1***Y	Example Reward:  -1
Epoch: 103	Loss: 24.1268539429	Example Output: /0Y10-28694	Example Reward:  -1
Epoch: 104	Loss: 24.1210950851	Example Output: /

Epoch: 183	Loss: 22.9988363266	Example Output: **0-0+54*0+69843801541-2664+*2	Example Reward:  -1
Epoch: 184	Loss: 22.9645456314	Example Output: 3511296653825641+45987-5*05083	Example Reward:  -1
Epoch: 185	Loss: 22.9474550247	Example Output: 1/47+113+6/6++261Y*694-3300530	Example Reward:  -1
Epoch: 186	Loss: 22.9277519226	Example Output: 2/216312552214	Example Reward:  0.74
Epoch: 187	Loss: 22.8853118896	Example Output: 52X95-2-64096034350+7675135327	Example Reward:  -1
Epoch: 188	Loss: 22.87302742	Example Output: 5-36595483*+23/-10348X1651-463	Example Reward:  -1
Epoch: 189	Loss: 22.9153194427	Example Output: 866105104081Y798351/391446605-	Example Reward:  -1
Epoch: 190	Loss: 22.9273931503	Example Output: Y4-813X0115Y1-3859*34730801+60	Example Reward:  -1
Saving Weight
Epoch: 191	Loss: 22.9066164017	Example Output: /0Y58896260653496X4843383164Y-	Example Reward:  -1
Epoch: 192	Loss: 22.8847316742	Example Output: 8+469844-626-642-8228-68*02790	Example Reward:  -1
Epoch: 193	Loss: 22.91