# FUNCTION GENERATOR using Policy Gradient

Useful links:
Policy Gradient Explanation: http://karpathy.github.io/2016/05/31/rl/ <br>
Example of Policy Gradient: https://github.com/keon/policy-gradient

In [7]:
import numpy as np
from keras.models import Sequential, Model
from keras.layers import TimeDistributed, Dense, Reshape, Flatten, GRU, Input, Embedding
from keras.optimizers import Adam
from keras.layers.convolutional import Convolution2D
from PolicyGradientModel import PolicyGradientModel
from RewardCalculator import RewardCalculator

In [8]:
ALLOWED_PARAMETERS = list('XY')
ALLOWED_SYMBOLS = ALLOWED_PARAMETERS + list('0123456789+-*/#')
NUM_SYMBOLS = len(ALLOWED_SYMBOLS)
MAX_LENGTH = 30 # Max length of the output expression
CORRECT_EXPRESSION = "3*X+2*Y"

### DEFINE MODEL

In [9]:
def getModel():
    # Trying to neglect input
    input1 = Input(shape=(1,1))
    # TODO: Add noise layer to make output vary
    x = GRU(32)(input1)
    out = Dense(NUM_SYMBOLS, activation='sigmoid')(x)
    model = Model(inputs=input1, outputs=out)
    model.compile(optimizer=Adam(),
                loss='categorical_crossentropy')
    return model

In [10]:
rewardCalculator = RewardCalculator(correctExpression=CORRECT_EXPRESSION,
                                    parameters=ALLOWED_PARAMETERS,
                                    usingFunctionDifferenceReward=False,
                                    usingCompilableReward=True,
                                    usingLengthReward=True,
                                    usingFile=False)

In [11]:
model = getModel()
pgModel = PolicyGradientModel(model=model,
                              allowedSymbol=ALLOWED_SYMBOLS,
                              numSymbol=NUM_SYMBOLS,
                              maxLength=MAX_LENGTH,
                              rewardCalculator=rewardCalculator,
                              learningRate=0.0001,
                              fileName="Model1.hdf5")

## TRAINING

In [None]:
pgModel.train(input=np.ones((1,1,1)))

Epoch: 0	Loss: 24.1669408798	Example Output: //77*7YY/4Y0-9**
Saving Weight
Epoch: 1	Loss: 24.1928524017	Example Output: 09X2Y4765880X3/
Epoch: 2	Loss: 24.2049629211	Example Output: 0-54
Epoch: 3	Loss: 24.2170661926	Example Output: 572X7-6306406107895/X6Y728X40/
Epoch: 4	Loss: 24.229199791	Example Output: 5464Y8132167
Epoch: 5	Loss: 24.2364273071	Example Output: 80
Epoch: 6	Loss: 24.2461334229	Example Output: 00139*78/X-01YX075X--2+Y-123Y7
Epoch: 7	Loss: 24.2559427261	Example Output: /-65X514X440/X3+65
Epoch: 8	Loss: 24.2579299927	Example Output: 001*8119
Epoch: 9	Loss: 24.2590667725	Example Output: 96*095Y70369*+Y50572/015+Y4802
Epoch: 10	Loss: 24.2606086731	Example Output: 6X/X17
Saving Weight
Epoch: 11	Loss: 24.2570659637	Example Output: X28+203+*8321538Y7Y5-3-7
Epoch: 12	Loss: 24.246553421	Example Output: /69395/X/7-985705885*-Y6Y829-1
Epoch: 13	Loss: 24.2363046646	Example Output: 9+388*45*24X18595
Epoch: 14	Loss: 24.2356575012	Example Output: 48326X+*-X-Y-*
Epoch: 15	Loss: 24.2316

Epoch: 129	Loss: 24.168060112	Example Output: 44-982//+1X54646*YYX522*4-1-35
Epoch: 130	Loss: 24.1712482452	Example Output: 4*++X90*37-+8*++81*8*
Saving Weight
Epoch: 131	Loss: 24.1758321762	Example Output: 0023031Y8X30719+9-
Epoch: 132	Loss: 24.1855085373	Example Output: 64X10--Y4
Epoch: 133	Loss: 24.1985923767	Example Output: 
Epoch: 134	Loss: 24.2171516418	Example Output: 1
Epoch: 135	Loss: 24.2341245651	Example Output: 1X+*478054*8402386+2*X*-89+X7+
Epoch: 136	Loss: 24.2384189606	Example Output: 5
Epoch: 137	Loss: 24.2253696442	Example Output: X05-Y28+6X5/282Y+9066XY5389846
Epoch: 138	Loss: 24.2176866531	Example Output: 482658/13843-5X6+-0/X9-258
Epoch: 139	Loss: 24.2176574707	Example Output: 1*+58X5/-0752765-47*X5
Epoch: 140	Loss: 24.2155532837	Example Output: 39X*9
Saving Weight
Epoch: 141	Loss: 24.2133615494	Example Output: 717522413+6589258314YX9-*4188-
Epoch: 142	Loss: 24.2134820938	Example Output: /-5/03*-
Epoch: 143	Loss: 24.2075265884	Example Output: 13
Epoch: 144	Loss: 24.