# FUNCTION GENERATOR using Policy Gradient

Useful links:
Policy Gradient Explanation: http://karpathy.github.io/2016/05/31/rl/ <br>
Example of Policy Gradient: https://github.com/keon/policy-gradient

In [1]:
import numpy as np
from keras.models import Sequential, Model
from keras.layers import TimeDistributed, Dense, Reshape, Flatten, GRU, Input, Embedding, LSTM
from keras.optimizers import Adam
from keras.layers.convolutional import Convolution2D
from PolicyGradientModel import PolicyGradientModel
from RewardCalculator import RewardCalculator

Using TensorFlow backend.


In [2]:
ALLOWED_PARAMETERS = list('XY')
ALLOWED_SYMBOLS = ALLOWED_PARAMETERS + list('0123456789+-*/#')
NUM_SYMBOLS = len(ALLOWED_SYMBOLS)
MAX_LENGTH = 10 # Max length of the output expression
CORRECT_EXPRESSION = "3*X+2*Y"

### DEFINE MODEL

In [3]:
def getModel():
    # Trying to neglect input
    input1 = Input(shape=(10,NUM_SYMBOLS,))
    x = Flatten()(input1)
    x = Dense(5, activation='relu')(x)
    x = Dense(100, activation='relu')(x)
    x = Dense(100, activation='relu')(x)
    x = Dense(100, activation='relu')(x)
    out = Dense(NUM_SYMBOLS, activation='softmax')(x)
    
    model = Model(inputs=input1, outputs=out)
    model.compile(optimizer=Adam(lr=0.0001),
                loss='categorical_crossentropy')
    return model

In [4]:
rewardCalculator = RewardCalculator(correctExpression=CORRECT_EXPRESSION,
                                    parameters=ALLOWED_PARAMETERS,
                                    functionDifferenceRewardWeight=1,
                                    compilableRewardWeight=1,
                                    lengthRewardWeight=-0.001,
                                    rewardOffset=+0.2,
                                    usingFile=False)

In [5]:
model = getModel()
pgModel = PolicyGradientModel(model=model,
                              allowedSymbol=ALLOWED_SYMBOLS,
                              numSymbol=NUM_SYMBOLS,
                              maxLength=MAX_LENGTH,
                              rewardCalculator=rewardCalculator,
                              learningRate=0.0000001,
                              fileName="Model1.hdf5")
pgModel.loadWeight("CorrectSyntaxModel.hdf5")

## TRAINING

In [6]:
pgModel.train(input=np.zeros((1,10,NUM_SYMBOLS)))

Epoch: 0	Loss: 2.17651643753	Example Output: 7	Example Reward:  0.131931772361
Prob
[[  2.50328124e-01   2.49789536e-01   4.99797761e-02   5.02386913e-02
    4.98908795e-02   4.99475673e-02   4.99993227e-02   4.99253757e-02
    5.00673912e-02   5.00247590e-02   4.99067828e-02   4.99014668e-02
    4.98546875e-08   4.86570926e-08   4.87266547e-08   4.83522591e-08
    6.65712179e-08]]
[[  4.52521137e-12   4.45633764e-12   2.85560098e-02   2.86212396e-02
    2.85980050e-02   2.85721738e-02   2.85476334e-02   2.85988543e-02
    2.85777301e-02   2.86266748e-02   2.85705253e-02   2.85791978e-02
    1.42695755e-01   1.42626792e-01   1.42880112e-01   1.42809689e-01
    1.43139556e-01]]
Gradient
[[ -3.30262333e-02  -3.29551734e-02  -6.59392029e-03  -6.62807934e-03
   -6.58219168e-03  -6.58967067e-03  -6.59649912e-03  -6.58674305e-03
   -6.60547940e-03   1.25331908e-01  -6.58428995e-03  -6.58358866e-03
   -6.57741683e-09  -6.41941611e-09  -6.42859366e-09  -6.37919895e-09
   -8.78285800e-09]
 [ -5

Epoch: 51	Loss: 2.03719419241	Example Output: Y-1-32-0*4	Example Reward:  0.130236222237
Epoch: 52	Loss: 2.1632060051	Example Output: 1+X*0*6	Example Reward:  0.128836623144
Epoch: 53	Loss: 2.10578591824	Example Output: 2-5/1+5-Y	Example Reward:  0.111891779643
Epoch: 54	Loss: 2.06901984215	Example Output: 8	Example Reward:  0.132448610884
Epoch: 55	Loss: 2.05816692114	Example Output: 59/X-3/Y	Example Reward:  -0.574772684578
Epoch: 56	Loss: 1.99774571657	Example Output: X	Example Reward:  0.149482313769
Epoch: 57	Loss: 1.99911437035	Example Output: 2/4+7+Y*75	Example Reward:  -0.122986375527
Epoch: 58	Loss: 1.95416837931	Example Output: 3+X/Y*7490	Example Reward:  -0.529490479863
Epoch: 59	Loss: 1.9781873703	Example Output: Y	Example Reward:  0.149482313769
Epoch: 60	Loss: 1.9751075387	Example Output: X-X	Example Reward:  0.128320278918
Saving Weight
Epoch: 61	Loss: 2.01217333078	Example Output: Y-3/X*Y+92	Example Reward:  -0.506703362379
Epoch: 62	Loss: 1.95640056133	Example Output: 

Epoch: 101	Loss: 1.93484873772	Example Output: X/Y	Example Reward:  -0.506753102546
Epoch: 102	Loss: 1.88033443689	Example Output: X+5	Example Reward:  0.152708018409
Epoch: 103	Loss: 1.8310448885	Example Output: Y+X-X	Example Reward:  0.149482313769
Epoch: 104	Loss: 1.90819569826	Example Output: 3+2-X	Example Reward:  0.113183868508
Epoch: 105	Loss: 1.9307580471	Example Output: 4-0+889+69	Example Reward:  -0.00883880763773
Epoch: 106	Loss: 1.90201817751	Example Output: 2	Example Reward:  0.129355794056
Epoch: 107	Loss: 1.91579706669	Example Output: X+X/5+X/65	Example Reward:  0.154731493654
Epoch: 108	Loss: 1.90254473686	Example Output: Y	Example Reward:  0.149482313769
Epoch: 109	Loss: 1.79204146862	Example Output: Y/X*Y	Example Reward:  -0.506909525076
Epoch: 110	Loss: 1.85087299347	Example Output: 8-6/X	Example Reward:  -0.511375167733
Saving Weight
Epoch: 111	Loss: 1.88643219471	Example Output: 2	Example Reward:  0.129355794056
Epoch: 112	Loss: 1.75402597189	Example Output: Y+Y*Y/

Epoch: 151	Loss: 1.79890284538	Example Output: 6/X-Y+8	Example Reward:  -0.511395487522
Epoch: 152	Loss: 1.71538938284	Example Output: 9	Example Reward:  0.13296335402
Epoch: 153	Loss: 1.80149878263	Example Output: 3	Example Reward:  0.129873053254
Epoch: 154	Loss: 1.74310665131	Example Output: X/Y	Example Reward:  -0.506753102546
Epoch: 155	Loss: 1.86755074263	Example Output: 3	Example Reward:  0.129873053254
Epoch: 156	Loss: 1.74033875465	Example Output: X/Y	Example Reward:  -0.506753102546
Epoch: 157	Loss: 1.90243786573	Example Output: 7	Example Reward:  0.131931772361
Epoch: 158	Loss: 1.8412207365	Example Output: X	Example Reward:  0.149482313769
Epoch: 159	Loss: 1.83126043081	Example Output: 0	Example Reward:  0.128320278918
Epoch: 160	Loss: 1.76761763096	Example Output: 2-9+X/4-X	Example Reward:  0.111968257681
Saving Weight
Epoch: 161	Loss: 1.83508688211	Example Output: 0+0/X/5/35	Example Reward:  0.128320278918
Epoch: 162	Loss: 1.7942025423	Example Output: Y-X+Y-3-84	Example Re

Epoch: 220	Loss: 0.861354660988	Example Output: X-Y+X-X-Y	Example Reward:  0.111029637462
Saving Weight
Epoch: 221	Loss: 0.886267662048	Example Output: X	Example Reward:  0.149482313769
Epoch: 222	Loss: 0.776093888283	Example Output: X-Y+Y-Y-Y	Example Reward:  0.111029637462
Epoch: 223	Loss: 0.827812415361	Example Output: X	Example Reward:  0.149482313769
Epoch: 224	Loss: 0.852806210518	Example Output: X	Example Reward:  0.149482313769
Epoch: 225	Loss: 0.848731517792	Example Output: X-Y-X-X-Y	Example Reward:  0.0837470294395
Epoch: 226	Loss: 0.852146083117	Example Output: Y-Y-X-X-Y	Example Reward:  0.0837470294395
Epoch: 227	Loss: 0.795522385836	Example Output: Y+Y-Y-X+X	Example Reward:  0.149482313769
Epoch: 228	Loss: 0.816908198595	Example Output: Y-X-Y-X-X	Example Reward:  0.0837470294395
Epoch: 229	Loss: 0.943204200268	Example Output: X-Y-Y-X	Example Reward:  0.0964106084126
Epoch: 230	Loss: 0.954858523607	Example Output: Y-X	Example Reward:  0.128320278918
Saving Weight
Epoch: 231

Epoch: 252	Loss: 0.839576023817	Example Output: Y+Y-X+X-X	Example Reward:  0.149482313769
Epoch: 253	Loss: 0.764317363501	Example Output: X+Y+X+X+Y	Example Reward:  10000000
Epoch: 254	Loss: 0.813181108236	Example Output: X+/Y+Y+X+X	Example Reward:  0
Epoch: 255	Loss: 0.848919075727	Example Output: /*Y+X+X+X	Example Reward:  0
Epoch: 256	Loss: 1.03358764648	Example Output: +X/Y+X+0+X	Example Reward:  -0.506708467786
Epoch: 257	Loss: 1.02674072385	Example Output: 	Example Reward:  0
Epoch: 258	Loss: 1.01505813003	Example Output: +Y+/*7+X-X	Example Reward:  0
Epoch: 259	Loss: 0.848819881678	Example Output: +X+*X+X+X	Example Reward:  0
Epoch: 260	Loss: 1.01946855187	Example Output: *++X+X+Y/5	Example Reward:  0
Saving Weight
Epoch: 261	Loss: 0.896503436565	Example Output: +Y+/X+X+Y	Example Reward:  0
Epoch: 262	Loss: 0.84994969964	Example Output: +X+*Y-Y+X	Example Reward:  0
Epoch: 263	Loss: 0.961226594448	Example Output: +X+*Y+X+X	Example Reward:  0
Epoch: 264	Loss: 0.86614819169	Example

Epoch: 305	Loss: 0.821521276236	Example Output: +X+/Y+X+X	Example Reward:  0
Epoch: 306	Loss: 0.770644289255	Example Output: -++X+Y+X+X	Example Reward:  0.176764921791
Epoch: 307	Loss: 0.851483935118	Example Output: -	Example Reward:  0
Epoch: 308	Loss: 0.877632075548	Example Output: -*+X+Y+X+X	Example Reward:  0
Epoch: 309	Loss: 0.903982615471	Example Output: -+	Example Reward:  0
Epoch: 310	Loss: 0.819541311264	Example Output: +X++X/Y/Y	Example Reward:  -0.511354650413
Saving Weight
Epoch: 311	Loss: 0.883188652992	Example Output: -++X	Example Reward:  0.111029637462
Epoch: 312	Loss: 0.814718103409	Example Output: -+-Y-X+X+Y	Example Reward:  0.176764921791
Epoch: 313	Loss: 0.915164422989	Example Output: +Y+/0-Y+Y	Example Reward:  0
Epoch: 314	Loss: 0.89117500186	Example Output: +Y+-Y/Y/Y	Example Reward:  -0.511355977383
Epoch: 315	Loss: 0.825214534998	Example Output: +Y++Y+X-Y	Example Reward:  0.176764921791
Epoch: 316	Loss: 0.965437912941	Example Output: *+*X+X+5X	Example Reward:  0


Epoch: 361	Loss: 0.895900911093	Example Output: +X++X/Y+X	Example Reward:  -0.506708467786
Epoch: 362	Loss: 0.93460136652	Example Output: +X-+X+X+Y	Example Reward:  0.176764921791
Epoch: 363	Loss: 0.890813559294	Example Output: +X+/Y+Y+Y	Example Reward:  0
Epoch: 364	Loss: 0.776958054304	Example Output: +Y++Y-Y+X	Example Reward:  0.176764921791
Epoch: 365	Loss: 0.741386419535	Example Output: -++X+Y-X+Y	Example Reward:  0.128320278918
Epoch: 366	Loss: 0.844072079659	Example Output: --*Y+X+Y+3	Example Reward:  0
Epoch: 367	Loss: 0.963482129574	Example Output: 	Example Reward:  0
Epoch: 368	Loss: 0.860030937195	Example Output: +Y-+X*Y+Y	Example Reward:  -0.0680919630061
Epoch: 369	Loss: 0.968266499043	Example Output: /	Example Reward:  0
Epoch: 370	Loss: 0.810627913475	Example Output: +X+	Example Reward:  0
Saving Weight
Epoch: 371	Loss: 0.890411758423	Example Output: +Y+*X+X+X	Example Reward:  0
Epoch: 372	Loss: 0.957786470652	Example Output: -++X+Y	Example Reward:  0.128320278918
Epoch:

Epoch: 417	Loss: 0.869811433554	Example Output: -++Y+X+Y+X	Example Reward:  0.176764921791
Epoch: 418	Loss: 0.872667729855	Example Output: +X+*Y+X-Y	Example Reward:  0
Epoch: 419	Loss: 0.820518296957	Example Output: +Y-+X+X+X	Example Reward:  0.176764921791
Epoch: 420	Loss: 0.852868551016	Example Output: -++X+Y+X+X	Example Reward:  0.176764921791
Saving Weight
Epoch: 421	Loss: 0.899009048939	Example Output: +X*+X+X+Y	Example Reward:  -0.112378308995
Epoch: 422	Loss: 0.945110493898	Example Output: +Y+-Y+9*40	Example Reward:  0.126798290923
Epoch: 423	Loss: 0.853504490852	Example Output: +Y++Y+X+Y	Example Reward:  0.230337444003
Epoch: 424	Loss: 0.858950740099	Example Output: +X+*Y+X+X	Example Reward:  0
Epoch: 425	Loss: 0.815460878611	Example Output: +X++X+Y+X	Example Reward:  0.280952882427
Epoch: 426	Loss: 0.876090174913	Example Output: +Y+/Y+Y+X	Example Reward:  0
Epoch: 427	Loss: 0.82605304718	Example Output: +Y+-Y+X+Y	Example Reward:  0.176764921791
Epoch: 428	Loss: 0.837913244963	

Epoch: 501	Loss: 0.912296921015	Example Output: +Y++Y+X+X	Example Reward:  0.280952882427
Epoch: 502	Loss: 0.850932860374	Example Output: -/+Y+X+X+X	Example Reward:  0
Epoch: 503	Loss: 0.784189230204	Example Output: +Y++X*X+Y	Example Reward:  -0.111679455727
Epoch: 504	Loss: 0.774786096811	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Epoch: 505	Loss: 0.810210138559	Example Output: +Y++X+X+X	Example Reward:  0.280952882427
Epoch: 506	Loss: 0.778162419796	Example Output: +X++X+X*X	Example Reward:  -0.113233856679
Epoch: 507	Loss: 0.813006412983	Example Output: -++Y*X+Y+X	Example Reward:  -0.0680919630061
Epoch: 508	Loss: 0.788542801142	Example Output: -+-X+X+X+Y	Example Reward:  0.280952882427
Epoch: 509	Loss: 0.867569470406	Example Output: 	Example Reward:  0
Epoch: 510	Loss: 0.828250420094	Example Output: +X+*Y+X-X	Example Reward:  0
Saving Weight
Epoch: 511	Loss: 0.808998060226	Example Output: +X+/Y/Y+X	Example Reward:  0
Epoch: 512	Loss: 0.891411429644	Example Output: +X

Epoch: 551	Loss: 0.82069504261	Example Output: -++X	Example Reward:  0.111029637462
Epoch: 552	Loss: 0.855004703999	Example Output: +Y++Y+X+Y	Example Reward:  0.230337444003
Epoch: 553	Loss: 0.788414788246	Example Output: /+X+X+Y-X	Example Reward:  0
Epoch: 554	Loss: 0.851867073774	Example Output: /+Y+X+X+Y	Example Reward:  0
Epoch: 555	Loss: 0.799450063705	Example Output: *++X+X*X+X	Example Reward:  0
Epoch: 556	Loss: 0.812441927195	Example Output: +X+-Y/Y+X	Example Reward:  0.175906671873
Epoch: 557	Loss: 0.86842366457	Example Output: --	Example Reward:  0
Epoch: 558	Loss: 0.857730406523	Example Output: -+	Example Reward:  0
Epoch: 559	Loss: 0.875788211823	Example Output: /	Example Reward:  0
Epoch: 560	Loss: 0.851946091652	Example Output: +Y++X+Y+X	Example Reward:  0.280952882427
Saving Weight
Epoch: 561	Loss: 0.862360554934	Example Output: +X+*X-Y+X	Example Reward:  0
Epoch: 562	Loss: 0.815535205603	Example Output: -+	Example Reward:  0
Epoch: 563	Loss: 0.897815483809	Example Outpu

Epoch: 607	Loss: 0.924607038498	Example Output: *++Y+Y+Y+Y	Example Reward:  0
Epoch: 608	Loss: 0.866369986534	Example Output: +X+/X+Y+X	Example Reward:  0
Epoch: 609	Loss: 0.786703306437	Example Output: +X+*Y/X+Y	Example Reward:  0
Epoch: 610	Loss: 0.85973918438	Example Output: +Y+-X+X+Y	Example Reward:  0.176764921791
Saving Weight
Epoch: 611	Loss: 0.81821603775	Example Output: +X++X*X+X	Example Reward:  -0.113233856679
Epoch: 612	Loss: 0.846918529272	Example Output: +Y/*X+X+X	Example Reward:  0
Epoch: 613	Loss: 0.912327802181	Example Output: -	Example Reward:  0
Epoch: 614	Loss: 0.848094201088	Example Output: 	Example Reward:  0
Epoch: 615	Loss: 0.939700472355	Example Output: 	Example Reward:  0
Epoch: 616	Loss: 0.877762818336	Example Output: +X+-Y-X+X	Example Reward:  0.128320278918
Epoch: 617	Loss: 0.808618170023	Example Output: *++X+Y+X+X	Example Reward:  0
Epoch: 618	Loss: 0.71027675271	Example Output: *++X+X/X+X	Example Reward:  0
Epoch: 619	Loss: 0.791664654016	Example Output: 

Epoch: 651	Loss: 0.836760175228	Example Output: +X++Y/0/X	Example Reward:  0
Epoch: 652	Loss: 0.818721204996	Example Output: +X*+X-Y+Y	Example Reward:  -0.110351368261
Epoch: 653	Loss: 0.960815554857	Example Output: -+	Example Reward:  0
Epoch: 654	Loss: 0.789763903618	Example Output: /+Y+X+X+Y	Example Reward:  0
Epoch: 655	Loss: 0.813946712017	Example Output: *+*X+X+X+Y	Example Reward:  0
Epoch: 656	Loss: 0.893847817183	Example Output: +Y*+3+X+X	Example Reward:  0.281600229437
Epoch: 657	Loss: 0.810281044245	Example Output: +Y+-X+X+Y	Example Reward:  0.176764921791
Epoch: 658	Loss: 0.753057014942	Example Output: +Y++X+Y-X	Example Reward:  0.176764921791
Epoch: 659	Loss: 0.833450609446	Example Output: +Y+-X+Y+X	Example Reward:  0.176764921791
Epoch: 660	Loss: 0.793846768141	Example Output: +X++Y	Example Reward:  0.176764921791
Saving Weight
Epoch: 661	Loss: 0.895499247313	Example Output: *++X+X+X+X	Example Reward:  0
Epoch: 662	Loss: 0.838953202963	Example Output: -+*X*X+Y+X	Example Re

Epoch: 745	Loss: 0.917506104708	Example Output: *++X+X*X+X	Example Reward:  0
Epoch: 746	Loss: 0.883190900087	Example Output: +X++9Y+X+Y	Example Reward:  0
Epoch: 747	Loss: 0.868054288626	Example Output: -+-X	Example Reward:  0.149482313769
Epoch: 748	Loss: 0.779207146168	Example Output: /+Y+X+Y+X	Example Reward:  0
Epoch: 749	Loss: 0.824992960691	Example Output: -+	Example Reward:  0
Epoch: 750	Loss: 0.816462016106	Example Output: *++Y+X+X+X	Example Reward:  0
Prob
[[  3.06929900e-22   4.31521607e-22   2.05341270e-20   3.34194280e-20
    1.64541911e-20   2.54532716e-20   2.44401046e-20   1.96279151e-20
    1.93077704e-20   2.37643675e-20   2.80915674e-20   2.32836624e-20
    5.72709620e-01   1.86425894e-01   1.11242726e-01   8.34755450e-02
    4.61461321e-02]]
[[ 0.          0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.85224324
   0.08620486  0.02716149  0.0189966   0.01539378]]
[[  3.39391019e-35   7.2

Epoch: 801	Loss: 0.790633887053	Example Output: *++Y+X+X+Y	Example Reward:  0
Epoch: 802	Loss: 0.830367225409	Example Output: -++X*Y+X+X	Example Reward:  -0.0680919630061
Epoch: 803	Loss: 0.771029442549	Example Output: +X+/X-X+X	Example Reward:  0
Epoch: 804	Loss: 0.794743722677	Example Output: 	Example Reward:  0
Epoch: 805	Loss: 0.777832388878	Example Output: +Y+/X+Y+X	Example Reward:  0
Epoch: 806	Loss: 0.789889669418	Example Output: -+-X+Y+X+X	Example Reward:  0.280952882427
Epoch: 807	Loss: 0.767662835121	Example Output: +Y+*X+X+X	Example Reward:  0
Epoch: 808	Loss: 0.8267552495	Example Output: /+Y+X+X+Y	Example Reward:  0
Epoch: 809	Loss: 0.841507011652	Example Output: +X+/Y/Y*Y	Example Reward:  0
Epoch: 810	Loss: 0.787693357468	Example Output: +X++Y-X+X	Example Reward:  0.176764921791
Saving Weight
Epoch: 811	Loss: 0.79841054678	Example Output: +X+/X+X+Y	Example Reward:  0
Epoch: 812	Loss: 0.818051040173	Example Output: +Y*-X+X+Y	Example Reward:  -0.0680919630061
Epoch: 813	Loss

Epoch: 851	Loss: 0.821312779188	Example Output: +X++Y-X+X	Example Reward:  0.176764921791
Epoch: 852	Loss: 0.774288272858	Example Output: +Y*+X+X+X	Example Reward:  -0.0539090397123
Epoch: 853	Loss: 0.820669221878	Example Output: +Y++X+Y	Example Reward:  0.215217598098
Epoch: 854	Loss: 0.787457132339	Example Output: -++Y+Y+X+Y	Example Reward:  0.176764921791
Epoch: 855	Loss: 0.751285821199	Example Output: *++X+6X+X	Example Reward:  0
Epoch: 856	Loss: 0.886086457968	Example Output: +Y+*Y+Y+X	Example Reward:  0
Epoch: 857	Loss: 0.827840614319	Example Output: +X++Y+Y+X	Example Reward:  0.280952882427
Epoch: 858	Loss: 0.770022952557	Example Output: */-Y+X+X+X	Example Reward:  0
Epoch: 859	Loss: 0.871753311157	Example Output: +X+*Y+X-Y	Example Reward:  0
Epoch: 860	Loss: 0.818245446682	Example Output: +X+-Y+Y+Y	Example Reward:  0.176764921791
Saving Weight
Epoch: 861	Loss: 0.858611226082	Example Output: +Y-+Y+X+X	Example Reward:  0.176764921791
Epoch: 862	Loss: 0.870772612095	Example Output

Epoch: 907	Loss: 0.777422302961	Example Output: +Y+*X+X+Y	Example Reward:  0
Epoch: 908	Loss: 0.827003872395	Example Output: +Y++Y+X+Y	Example Reward:  0.230337444003
Epoch: 909	Loss: 0.839789158106	Example Output: /	Example Reward:  0
Epoch: 910	Loss: 0.680471360683	Example Output: /+Y+X+X+Y	Example Reward:  0
Saving Weight
Epoch: 911	Loss: 0.764910119772	Example Output: +X+/Y+Y/Y	Example Reward:  0
Epoch: 912	Loss: 0.755010712147	Example Output: +X++Y+X+X	Example Reward:  0.280952882427
Epoch: 913	Loss: 0.811079877615	Example Output: /+Y+X-Y+X	Example Reward:  0
Epoch: 914	Loss: 0.8073528409	Example Output: -/+X+Y+Y+Y	Example Reward:  0
Epoch: 915	Loss: 0.828517687321	Example Output: +X++X*X+Y	Example Reward:  -0.112378308995
Epoch: 916	Loss: 0.769909638166	Example Output: +X++X-X+X	Example Reward:  0.176764921791
Epoch: 917	Loss: 0.827045482397	Example Output: +X+*X-X+X	Example Reward:  0
Epoch: 918	Loss: 0.810102266073	Example Output: +X+/Y+X+Y	Example Reward:  0
Epoch: 919	Loss: 0

Epoch: 951	Loss: 0.756184381247	Example Output: -*+Y+X+X+Y	Example Reward:  0
Epoch: 952	Loss: 0.771424347162	Example Output: +Y++X+X+Y	Example Reward:  0.280952882427
Epoch: 953	Loss: 0.766260671616	Example Output: +X+-Y-X+X	Example Reward:  0.128320278918
Epoch: 954	Loss: 0.793460375071	Example Output: *++X+X-Y+Y	Example Reward:  0
Epoch: 955	Loss: 0.81261908412	Example Output: -*+X*X+X+X	Example Reward:  0
Epoch: 956	Loss: 0.819607412815	Example Output: -*	Example Reward:  0
Epoch: 957	Loss: 0.88352701664	Example Output: -++X/Y+Y+Y	Example Reward:  -0.506713154386
Epoch: 958	Loss: 0.776006430387	Example Output: +Y++Y+X+X	Example Reward:  0.280952882427
Epoch: 959	Loss: 0.804962462187	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Epoch: 960	Loss: 0.80617967844	Example Output: +X++Y+X+X	Example Reward:  0.280952882427
Saving Weight
Epoch: 961	Loss: 0.792906993628	Example Output: *++X/X*X+Y	Example Reward:  0
Epoch: 962	Loss: 0.841342282295	Example Output: +X++X*X+X	Example

Epoch: 1012	Loss: 0.754076743126	Example Output: +Y--X+Y+X	Example Reward:  0.280952882427
Epoch: 1013	Loss: 0.697275209427	Example Output: -++Y+X+X+X	Example Reward:  0.176764921791
Epoch: 1014	Loss: 0.768397086859	Example Output: +X++Y-X+X	Example Reward:  0.176764921791
Epoch: 1015	Loss: 0.784109151363	Example Output: +X+-Y+Y+X	Example Reward:  0.176764921791
Epoch: 1016	Loss: 0.718748050928	Example Output: +Y++Y+X-X	Example Reward:  0.176764921791
Epoch: 1017	Loss: 0.75655490756	Example Output: -++X*Y+X-Y	Example Reward:  -0.0727569674652
Epoch: 1018	Loss: 0.768940103054	Example Output: *++X*X*Y+Y	Example Reward:  0
Epoch: 1019	Loss: 0.744841623306	Example Output: -++X*X+X+Y	Example Reward:  -0.120317423547
Epoch: 1020	Loss: 0.784736138582	Example Output: +X*+X+X+Y	Example Reward:  -0.112378308995
Saving Weight
Epoch: 1021	Loss: 0.905612260103	Example Output: -++X+X+Y+Y	Example Reward:  0.176764921791
Epoch: 1022	Loss: 0.892811173201	Example Output: /+X+X+X+X	Example Reward:  0
Epo

Epoch: 1051	Loss: 0.710204362869	Example Output: +X++X+Y+Y	Example Reward:  0.280952882427
Epoch: 1052	Loss: 0.759190279245	Example Output: +Y+-Y+Y+X	Example Reward:  0.176764921791
Epoch: 1053	Loss: 0.82133281827	Example Output: +X++X+X-X	Example Reward:  0.176764921791
Epoch: 1054	Loss: 0.762554484606	Example Output: +X++Y+X+X	Example Reward:  0.280952882427
Epoch: 1055	Loss: 0.717957180738	Example Output: +X++X-X+X	Example Reward:  0.176764921791
Epoch: 1056	Loss: 0.693018805981	Example Output: /+X+X+X+X	Example Reward:  0
Epoch: 1057	Loss: 0.75009585619	Example Output: -++X+X/X+Y	Example Reward:  0.128836623144
Epoch: 1058	Loss: 0.750043904781	Example Output: +Y++X+X+Y	Example Reward:  0.280952882427
Epoch: 1059	Loss: 0.741926348209	Example Output: +X+-X/Y+X	Example Reward:  -0.506715386512
Epoch: 1060	Loss: 0.755517423153	Example Output: +Y++X+X+Y	Example Reward:  0.280952882427
Saving Weight
Epoch: 1061	Loss: 0.739269220829	Example Output: +Y++X+X+Y	Example Reward:  0.28095288242

Epoch: 1102	Loss: 0.796995949745	Example Output: -++X+X+Y+X	Example Reward:  0.176764921791
Epoch: 1103	Loss: 0.710628217459	Example Output: +Y-+X-Y+X	Example Reward:  0.128320278918
Epoch: 1104	Loss: 0.712171369791	Example Output: +X+/X+X-X	Example Reward:  0
Epoch: 1105	Loss: 0.766311526299	Example Output: -++X+X+Y+X	Example Reward:  0.176764921791
Epoch: 1106	Loss: 0.749664109945	Example Output: *++X/Y+X+X	Example Reward:  0
Epoch: 1107	Loss: 0.797418063879	Example Output: -+-X+X+Y+X	Example Reward:  0.280952882427
Epoch: 1108	Loss: 0.700790649652	Example Output: +Y++X+Y+X	Example Reward:  0.280952882427
Epoch: 1109	Loss: 0.791004890203	Example Output: +X+-Y	Example Reward:  0.128320278918
Epoch: 1110	Loss: 0.745303946733	Example Output: +X++Y+X+X	Example Reward:  0.280952882427
Saving Weight
Epoch: 1111	Loss: 0.783937513828	Example Output: **+X+X+X+X	Example Reward:  0
Epoch: 1112	Loss: 0.807702898979	Example Output: -++X	Example Reward:  0.111029637462
Epoch: 1113	Loss: 0.84986569

Epoch: 1151	Loss: 0.776534509659	Example Output: *++X/X/Y+Y	Example Reward:  0
Epoch: 1152	Loss: 0.736150711775	Example Output: *++X-X+X+Y	Example Reward:  0
Epoch: 1153	Loss: 0.832792371511	Example Output: +X*+Y+X/X	Example Reward:  -0.0499248180602
Epoch: 1154	Loss: 0.791435742378	Example Output: +Y++X+X+X	Example Reward:  0.280952882427
Epoch: 1155	Loss: 0.870732021332	Example Output: 	Example Reward:  0
Epoch: 1156	Loss: 0.733650314808	Example Output: +X-+X+X+X	Example Reward:  0.176764921791
Epoch: 1157	Loss: 0.735792320967	Example Output: +X	Example Reward:  0.149482313769
Epoch: 1158	Loss: 0.814335954189	Example Output: -++Y+X+X+X	Example Reward:  0.176764921791
Epoch: 1159	Loss: 0.788621687889	Example Output: -+-X+Y+Y+X	Example Reward:  0.280952882427
Epoch: 1160	Loss: 0.677509701252	Example Output: +Y++X-Y+Y	Example Reward:  0.176764921791
Saving Weight
Epoch: 1161	Loss: 0.7353505373	Example Output: /+X+X+Y+X	Example Reward:  0
Epoch: 1162	Loss: 0.744652163982	Example Output: 

Epoch: 1201	Loss: 0.76342689395	Example Output: -/-Y+X+X+Y	Example Reward:  0
Epoch: 1202	Loss: 0.802160251141	Example Output: +X++Y+Y+Y	Example Reward:  0.230337444003
Epoch: 1203	Loss: 0.766786772013	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Epoch: 1204	Loss: 0.849601584673	Example Output: 	Example Reward:  0
Epoch: 1205	Loss: 0.741816067696	Example Output: -++X-Y+Y+Y	Example Reward:  0.128320278918
Epoch: 1206	Loss: 0.76663672328	Example Output: *	Example Reward:  0
Epoch: 1207	Loss: 0.79278280735	Example Output: +Y++X+X+Y	Example Reward:  0.280952882427
Epoch: 1208	Loss: 0.79183884263	Example Output: +X++X-X	Example Reward:  0.149482313769
Epoch: 1209	Loss: 0.814690685272	Example Output: 	Example Reward:  0
Epoch: 1210	Loss: 0.746147215366	Example Output: +X+/X-Y+Y	Example Reward:  0
Saving Weight
Epoch: 1211	Loss: 0.833687698841	Example Output: 	Example Reward:  0
Epoch: 1212	Loss: 0.755640912056	Example Output: *++Y+Y+X+Y	Example Reward:  0
Epoch: 1213	Loss: 0.715

Epoch: 1259	Loss: 0.716642767191	Example Output: +X++Y-X+Y	Example Reward:  0.176764921791
Epoch: 1260	Loss: 0.78531383872	Example Output: +X++Y+X-X	Example Reward:  0.176764921791
Saving Weight
Epoch: 1261	Loss: 0.814739722013	Example Output: *++X*Y-X+Y	Example Reward:  0
Epoch: 1262	Loss: 0.777970314026	Example Output: *++X+X+X+Y	Example Reward:  0
Epoch: 1263	Loss: 0.725309354067	Example Output: +Y-+Y*X+Y	Example Reward:  -0.0680919630061
Epoch: 1264	Loss: 0.757417184114	Example Output: *-+Y+Y+Y+X	Example Reward:  0
Epoch: 1265	Loss: 0.925791198015	Example Output: +Y++X+Y+X	Example Reward:  0.280952882427
Epoch: 1266	Loss: 0.838901609182	Example Output: 	Example Reward:  0
Epoch: 1267	Loss: 0.804701960087	Example Output: +X++X	Example Reward:  0.176764921791
Epoch: 1268	Loss: 0.746378397942	Example Output: +X++X-Y-X	Example Reward:  0.128320278918
Epoch: 1269	Loss: 0.816377991438	Example Output: -++X+X+X+X	Example Reward:  0.176764921791
Epoch: 1270	Loss: 0.747868227959	Example Outp

Epoch: 1350	Loss: 0.776088398695	Example Output: /+Y+Y+X+Y	Example Reward:  0
Prob
[[  4.01024219e-22   5.62567635e-22   2.12383017e-20   3.46240712e-20
    1.70729432e-20   2.65410173e-20   2.54657972e-20   2.05541878e-20
    2.00941396e-20   2.46700592e-20   2.91061593e-20   2.42219858e-20
    5.72122157e-01   1.86473250e-01   1.11198910e-01   8.33910331e-02
    4.68146801e-02]]
[[  2.00999692e-23   3.14069823e-23   3.55705665e-24   6.82012062e-24
    2.76326221e-24   4.97860333e-24   4.63846268e-24   3.51923866e-24
    3.58274039e-24   4.28263316e-24   5.67549410e-24   4.44812356e-24
    6.11030102e-01   1.68120638e-01   8.98072571e-02   6.18044659e-02
    6.92375228e-02]]
[[  6.53380871e-01   3.46619129e-01   5.63243115e-12   7.44704385e-12
    4.19988653e-12   6.00475572e-12   5.96333616e-12   3.98292337e-12
    4.30511312e-12   5.38753807e-12   6.63562651e-12   5.49325299e-12
    1.35788021e-11   4.96533500e-12   2.99320529e-12   1.73494986e-12
    9.72261768e-12]]
[[ 0.         

Epoch: 1401	Loss: 0.756017786264	Example Output: +Y++Y/Y+X	Example Reward:  0.177627063972
Epoch: 1402	Loss: 0.86221729517	Example Output: +X*+X+Y-X	Example Reward:  -0.109445588249
Epoch: 1403	Loss: 0.780121952295	Example Output: *++X+X+X+Y	Example Reward:  0
Epoch: 1404	Loss: 0.750482821465	Example Output: +Y++Y+Y+Y	Example Reward:  0.189241565828
Epoch: 1405	Loss: 0.768195056915	Example Output: +X++X+X-X	Example Reward:  0.176764921791
Epoch: 1406	Loss: 0.767679351568	Example Output: +X++X-X+X	Example Reward:  0.176764921791
Epoch: 1407	Loss: 0.825449979305	Example Output: +X+-Y-X-X	Example Reward:  0.0964106084126
Epoch: 1408	Loss: 0.760017961264	Example Output: 	Example Reward:  0
Epoch: 1409	Loss: 0.745993262529	Example Output: *-*Y+Y+X+Y	Example Reward:  0
Epoch: 1410	Loss: 0.784543710947	Example Output: *-+X+Y-X+X	Example Reward:  0
Saving Weight
Epoch: 1411	Loss: 0.802752912045	Example Output: +X+*X+Y+X	Example Reward:  0
Epoch: 1412	Loss: 0.795490127802	Example Output: +X++X/

Epoch: 1461	Loss: 0.773502761126	Example Output: +Y+++X-Y+X	Example Reward:  0.176764921791
Epoch: 1462	Loss: 0.730215245485	Example Output: -++X+X*X+Y	Example Reward:  -0.109445588249
Epoch: 1463	Loss: 0.687718069553	Example Output: *++X+X+X+X	Example Reward:  0
Epoch: 1464	Loss: 0.748378092051	Example Output: +X-+Y+Y+Y	Example Reward:  0.176764921791
Epoch: 1465	Loss: 0.778125709295	Example Output: -+	Example Reward:  0
Epoch: 1466	Loss: 0.795866030455	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Epoch: 1467	Loss: 0.73009672761	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Epoch: 1468	Loss: 0.761947369576	Example Output: +Y+++4X+Y	Example Reward:  0
Epoch: 1469	Loss: 0.824233359098	Example Output: -++X	Example Reward:  0.111029637462
Epoch: 1470	Loss: 0.743227726221	Example Output: +Y+++X+Y+X	Example Reward:  0.280952882427
Saving Weight
Epoch: 1471	Loss: 0.771288740635	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Epoch: 1472	Loss: 0.82188205719	

Epoch: 1545	Loss: 0.717509424686	Example Output: +Y++-X+X+Y	Example Reward:  0.176764921791
Epoch: 1546	Loss: 0.659877258539	Example Output: -++Y+Y+X+X	Example Reward:  0.176764921791
Epoch: 1547	Loss: 0.857616943121	Example Output: 	Example Reward:  0
Epoch: 1548	Loss: 0.700766682625	Example Output: +X+-Y+Y+X	Example Reward:  0.176764921791
Epoch: 1549	Loss: 0.792170774937	Example Output: -/+Y-X+Y+X	Example Reward:  0
Epoch: 1550	Loss: 0.812540149689	Example Output: +Y++-X+X+X	Example Reward:  0.176764921791
Prob
[[  3.52247977e-23   5.12777890e-23   1.62050528e-21   2.66287877e-21
    1.29983961e-21   2.03434530e-21   1.95641826e-21   1.56200174e-21
    1.53646002e-21   1.92584449e-21   2.24635840e-21   1.82066997e-21
    5.81501245e-01   1.84448555e-01   1.08111516e-01   8.05161968e-02
    4.54224646e-02]]
[[  6.54417157e-01   3.45582843e-01   8.19525044e-15   1.18600774e-14
    5.89818010e-15   9.28400880e-15   8.78935257e-15   5.29423925e-15
    5.80204355e-15   8.26371781e-15   1

Epoch: 1575	Loss: 0.674802345037	Example Output: +Y++	Example Reward:  0
Epoch: 1576	Loss: 0.672350817919	Example Output: +Y++-Y+X+X	Example Reward:  0.176764921791
Epoch: 1577	Loss: 0.775418108702	Example Output: +X++X-Y+Y	Example Reward:  0.176764921791
Epoch: 1578	Loss: 0.765744823217	Example Output: *++X+X+Y+X	Example Reward:  0
Epoch: 1579	Loss: 0.634640210867	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Epoch: 1580	Loss: 0.733379268646	Example Output: *++X+X+X+X	Example Reward:  0
Saving Weight
Epoch: 1581	Loss: 0.714722174406	Example Output: 	Example Reward:  0
Epoch: 1582	Loss: 0.709714651108	Example Output: -++X+X-X+X	Example Reward:  0.128320278918
Epoch: 1583	Loss: 0.686267846823	Example Output: +X++X-X+Y	Example Reward:  0.176764921791
Epoch: 1584	Loss: 0.756277167797	Example Output: +Y++/X+X+X	Example Reward:  0
Epoch: 1585	Loss: 0.681048309803	Example Output: +X-+X+Y+Y	Example Reward:  0.176764921791
Epoch: 1586	Loss: 0.626738542318	Example Output: +X++X-Y+X	

Epoch: 1651	Loss: 0.837286043167	Example Output: +Y+-+Y-Y+X	Example Reward:  0.128320278918
Epoch: 1652	Loss: 0.626505386829	Example Output: +Y+++X+Y+Y	Example Reward:  0.230337444003
Epoch: 1653	Loss: 0.647941440344	Example Output: +Y+++Y+X+X	Example Reward:  0.280952882427
Epoch: 1654	Loss: 0.76124740243	Example Output: **+X+X+X+Y	Example Reward:  0
Epoch: 1655	Loss: 0.877324610949	Example Output: *+/X+Y/X+X	Example Reward:  0
Epoch: 1656	Loss: 0.654003626108	Example Output: +X++X/X+X	Example Reward:  0.177627063972
Epoch: 1657	Loss: 0.717637515068	Example Output: //X+X+X+Y	Example Reward:  0
Epoch: 1658	Loss: 0.843241173029	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Epoch: 1659	Loss: 0.701010131836	Example Output: +Y+++Y+X+X	Example Reward:  0.280952882427
Epoch: 1660	Loss: 0.61807205677	Example Output: +Y--+X+X+Y	Example Reward:  0.280952882427
Saving Weight
Epoch: 1661	Loss: 0.709166640043	Example Output: +X++X+Y+Y	Example Reward:  0.280952882427
Epoch: 1662	Loss: 0

Epoch: 1702	Loss: 0.70727314353	Example Output: +X++Y+X+Y	Example Reward:  0.280952882427
Epoch: 1703	Loss: 0.684573966265	Example Output: +X++Y	Example Reward:  0.176764921791
Epoch: 1704	Loss: 0.603041243553	Example Output: +Y+++X+Y+X	Example Reward:  0.280952882427
Epoch: 1705	Loss: 0.739484661818	Example Output: +X++Y+X+Y	Example Reward:  0.280952882427
Epoch: 1706	Loss: 0.667546832561	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Epoch: 1707	Loss: 0.819780951738	Example Output: +Y+++Y+X+Y	Example Reward:  0.230337444003
Epoch: 1708	Loss: 0.699821066856	Example Output: --	Example Reward:  0
Epoch: 1709	Loss: 0.727097564936	Example Output: -++X+X+X+X	Example Reward:  0.176764921791
Epoch: 1710	Loss: 0.608308291435	Example Output: *++Y+X+Y+X	Example Reward:  0
Saving Weight
Epoch: 1711	Loss: 0.63735640645	Example Output: +Y+++X+Y+X	Example Reward:  0.280952882427
Epoch: 1712	Loss: 0.690093785524	Example Output: +X+-X+X+Y	Example Reward:  0.176764921791
Epoch: 1713	Loss: 0

Epoch: 1751	Loss: 0.624205678701	Example Output: /+X+Y+Y+Y	Example Reward:  0
Epoch: 1752	Loss: 0.654004269838	Example Output: +Y++	Example Reward:  0
Epoch: 1753	Loss: 0.664066106081	Example Output: +Y++-X+Y+X	Example Reward:  0.176764921791
Epoch: 1754	Loss: 0.649939334393	Example Output: -	Example Reward:  0
Epoch: 1755	Loss: 0.743746292591	Example Output: +Y++-X+Y+X	Example Reward:  0.176764921791
Epoch: 1756	Loss: 0.80195299387	Example Output: -++Y+Y+X+Y	Example Reward:  0.176764921791
Epoch: 1757	Loss: 0.790103268623	Example Output: 	Example Reward:  0
Epoch: 1758	Loss: 0.634663921595	Example Output: +X++Y-Y+X	Example Reward:  0.176764921791
Epoch: 1759	Loss: 0.7653021276	Example Output: --	Example Reward:  0
Epoch: 1760	Loss: 0.609311538935	Example Output: +X+/Y+Y+X	Example Reward:  0
Saving Weight
Epoch: 1761	Loss: 0.587702316046	Example Output: +Y+++Y+X+Y	Example Reward:  0.230337444003
Epoch: 1762	Loss: 0.725361818075	Example Output: +Y++	Example Reward:  0
Epoch: 1763	Loss: 

Epoch: 1835	Loss: 0.716388487816	Example Output: +Y+++X+X+X	Example Reward:  0.280952882427
Epoch: 1836	Loss: 0.637074434757	Example Output: +X++X*Y+X	Example Reward:  -0.0539090397123
Epoch: 1837	Loss: 0.670115989447	Example Output: *+-X-X+Y+X	Example Reward:  0
Epoch: 1838	Loss: 0.619879937172	Example Output: +X++Y+X+X	Example Reward:  0.280952882427
Epoch: 1839	Loss: 0.643485206366	Example Output: +X++X+Y+Y	Example Reward:  0.280952882427
Epoch: 1840	Loss: 0.629771173	Example Output: *++X+X+X+X	Example Reward:  0
Saving Weight
Epoch: 1841	Loss: 0.728654819727	Example Output: +Y+++X+X+Y	Example Reward:  0.280952882427
Epoch: 1842	Loss: 0.65842730999	Example Output: +X++Y/X*Y	Example Reward:  -0.506888485226
Epoch: 1843	Loss: 0.69040415287	Example Output: -	Example Reward:  0
Epoch: 1844	Loss: 0.684066677094	Example Output: -+	Example Reward:  0
Epoch: 1845	Loss: 0.792130851746	Example Output: 	Example Reward:  0
Epoch: 1846	Loss: 0.623824840784	Example Output: /+X+X+X+Y	Example Rewar

Epoch: 1919	Loss: 0.769810599089	Example Output: -+	Example Reward:  0
Epoch: 1920	Loss: 0.710145103931	Example Output: 	Example Reward:  0
Saving Weight
Epoch: 1921	Loss: 0.835576987267	Example Output: 	Example Reward:  0
Epoch: 1922	Loss: 0.823607248068	Example Output: -	Example Reward:  0
Epoch: 1923	Loss: 0.842988806963	Example Output: 	Example Reward:  0
Epoch: 1924	Loss: 0.696998518705	Example Output: /	Example Reward:  0
Epoch: 1925	Loss: 0.613377648592	Example Output: +Y++-X+Y+Y	Example Reward:  0.162254875525
Epoch: 1926	Loss: 0.623488813639	Example Output: *++X+X+X+Y	Example Reward:  0
Epoch: 1927	Loss: 0.721245729923	Example Output: +X++X+Y+X	Example Reward:  0.280952882427
Epoch: 1928	Loss: 0.698945993185	Example Output: -+/X	Example Reward:  0
Epoch: 1929	Loss: 0.642459100485	Example Output: +X++X+X+Y	Example Reward:  0.280952882427
Epoch: 1930	Loss: 0.649652707577	Example Output: +Y+++Y+X+Y	Example Reward:  0.230337444003
Saving Weight
Epoch: 1931	Loss: 0.681360447407	Exa

Epoch: 1952	Loss: 0.665123909712	Example Output: /+X+X+X+X	Example Reward:  0
Epoch: 1953	Loss: 0.724508726597	Example Output: +Y+++Y+Y+X	Example Reward:  0.230337444003
Epoch: 1954	Loss: 0.674668866396	Example Output: +X+-Y+X/X	Example Reward:  0.128836623144
Epoch: 1955	Loss: 0.786504650116	Example Output: +X++X-Y+Y	Example Reward:  0.176764921791
Epoch: 1956	Loss: 0.846340441704	Example Output: +Y++-X+X+X	Example Reward:  0.176764921791
Epoch: 1957	Loss: 0.723889243603	Example Output: -+	Example Reward:  0
Epoch: 1958	Loss: 0.77730820179	Example Output: +X+-Y+X+X	Example Reward:  0.176764921791
Epoch: 1959	Loss: 0.874742537737	Example Output: +X+*X+X+X	Example Reward:  0
Epoch: 1960	Loss: 0.676402002573	Example Output: **+X+Y+Y+X	Example Reward:  0
Saving Weight
Epoch: 1961	Loss: 0.840653502941	Example Output: +Y+++X+Y+X	Example Reward:  0.280952882427
Epoch: 1962	Loss: 0.686679244041	Example Output: +Y	Example Reward:  0.149482313769
Epoch: 1963	Loss: 0.745053839684	Example Output:

Epoch: 2001	Loss: 0.763615018129	Example Output: -++Y-X+X+X	Example Reward:  0.128320278918
Epoch: 2002	Loss: 0.84531738162	Example Output: 	Example Reward:  0
Epoch: 2003	Loss: 0.727536559105	Example Output: +X+-X*Y+X	Example Reward:  -0.0680919630061
Epoch: 2004	Loss: 0.659375166893	Example Output: +X++X+X*Y	Example Reward:  -0.0539090397123
Epoch: 2005	Loss: 0.743872183561	Example Output: /+X+X+Y+Y	Example Reward:  0
Epoch: 2006	Loss: 0.734200882912	Example Output: 	Example Reward:  0
Epoch: 2007	Loss: 0.769704085588	Example Output: +X-	Example Reward:  0
Epoch: 2008	Loss: 0.826288360357	Example Output: +Y+++X+X+Y	Example Reward:  0.280952882427
Epoch: 2009	Loss: 0.637879240513	Example Output: +Y+++Y+X+X	Example Reward:  0.280952882427
Epoch: 2010	Loss: 0.80568883419	Example Output: 	Example Reward:  0
Saving Weight
Epoch: 2011	Loss: 0.879734396935	Example Output: +Y	Example Reward:  0.149482313769
Epoch: 2012	Loss: 0.737647217512	Example Output: +X++X*X+X	Example Reward:  -0.113233

Epoch: 2080	Loss: 0.675630652905	Example Output: +X++Y+Y+X	Example Reward:  0.280952882427
Saving Weight
Epoch: 2081	Loss: 0.647482550144	Example Output: +Y+++X+X+Y	Example Reward:  0.280952882427
Epoch: 2082	Loss: 0.752911019325	Example Output: -	Example Reward:  0
Epoch: 2083	Loss: 0.873685818911	Example Output: +X++Y+Y+X	Example Reward:  0.280952882427
Epoch: 2084	Loss: 0.85901016593	Example Output: +X++Y+Y+X	Example Reward:  0.280952882427
Epoch: 2085	Loss: 0.706684070826	Example Output: -	Example Reward:  0
Epoch: 2086	Loss: 0.794669061899	Example Output: *-	Example Reward:  0
Epoch: 2087	Loss: 0.742212098837	Example Output: +X++Y+X+Y	Example Reward:  0.280952882427
Epoch: 2088	Loss: 0.591158086061	Example Output: /*Y+X+X+X	Example Reward:  0
Epoch: 2089	Loss: 0.847390371561	Example Output: +X++Y+X	Example Reward:  0.215217598098
Epoch: 2090	Loss: 0.654253786802	Example Output: +X++Y+X+X	Example Reward:  0.280952882427
Saving Weight
Epoch: 2091	Loss: 0.815529036522	Example Output:

Epoch: 2151	Loss: 0.716069597006	Example Output: +X++Y+X+X	Example Reward:  0.280952882427
Epoch: 2152	Loss: 0.720899164677	Example Output: /+Y-X+Y+X	Example Reward:  0
Epoch: 2153	Loss: 0.784408503771	Example Output: *++X+X+X+Y	Example Reward:  0
Epoch: 2154	Loss: 0.633671045303	Example Output: -+	Example Reward:  0
Epoch: 2155	Loss: 0.814560669661	Example Output: 	Example Reward:  0
Epoch: 2156	Loss: 0.646144545078	Example Output: +X++Y-Y+X	Example Reward:  0.176764921791
Epoch: 2157	Loss: 0.752798002958	Example Output: +X+/X+X+X	Example Reward:  0
Epoch: 2158	Loss: 0.691143846512	Example Output: +X++X+Y+Y	Example Reward:  0.280952882427
Epoch: 2159	Loss: 0.637258708477	Example Output: +X-+X+X+X	Example Reward:  0.176764921791
Epoch: 2160	Loss: 0.686606895924	Example Output: -++X+X+X+Y	Example Reward:  0.176764921791
Saving Weight
Epoch: 2161	Loss: 0.679075568914	Example Output: +X++X+Y+X	Example Reward:  0.280952882427
Epoch: 2162	Loss: 0.649449139833	Example Output: +X++X-X+X	Examp

Epoch: 2201	Loss: 0.646817857027	Example Output: *++Y+X+X+X	Example Reward:  0
Epoch: 2202	Loss: 0.739777266979	Example Output: -++X+X+Y+Y	Example Reward:  0.176764921791
Epoch: 2203	Loss: 0.714877098799	Example Output: 	Example Reward:  0
Epoch: 2204	Loss: 0.768107461929	Example Output: -++X	Example Reward:  0.111029637462
Epoch: 2205	Loss: 0.801196217537	Example Output: -+	Example Reward:  0
Epoch: 2206	Loss: 0.717756545544	Example Output: +X-*Y+X+Y	Example Reward:  0
Epoch: 2207	Loss: 0.729664158821	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Epoch: 2208	Loss: 0.783028155565	Example Output: +X++X+Y+Y	Example Reward:  0.280952882427
Epoch: 2209	Loss: 0.692439347506	Example Output: -+	Example Reward:  0
Epoch: 2210	Loss: 0.939997369051	Example Output: 	Example Reward:  0
Saving Weight
Epoch: 2211	Loss: 0.892407774925	Example Output: +Y+++Y+X+Y	Example Reward:  0.230337444003
Epoch: 2212	Loss: 0.709590518475	Example Output: +X++Y+X+X	Example Reward:  0.280952882427
Epoch:

Epoch: 2293	Loss: 0.814732003212	Example Output: *++Y+Y+Y+Y	Example Reward:  0
Epoch: 2294	Loss: 0.663927596807	Example Output: +Y+++X*Y+Y	Example Reward:  -0.0543981702588
Epoch: 2295	Loss: 0.570319241285	Example Output: /+X+Y+Y+X	Example Reward:  0
Epoch: 2296	Loss: 0.759547591209	Example Output: /-Y	Example Reward:  0
Epoch: 2297	Loss: 0.7672704041	Example Output: +Y+++Y+X+X	Example Reward:  0.280952882427
Epoch: 2298	Loss: 0.697409969568	Example Output: --+Y	Example Reward:  0.149482313769
Epoch: 2299	Loss: 0.722885781527	Example Output: +Y+++X+X+Y	Example Reward:  0.280952882427
Epoch: 2300	Loss: 0.745141518116	Example Output: +X++Y+X+X	Example Reward:  0.280952882427
Prob
[[  4.37450549e-21   8.43944995e-21   4.17231562e-23   7.28258339e-23
    3.15138793e-23   5.49485544e-23   5.33335921e-23   4.40738134e-23
    3.91662813e-23   4.97216662e-23   6.11181157e-23   4.90311668e-23
    5.85531354e-01   1.84051916e-01   9.96765941e-02   6.55605420e-02
    6.51795492e-02]]
[[  5.326095

Epoch: 2323	Loss: 0.745503425598	Example Output: +X++X+Y+Y	Example Reward:  0.280952882427
Epoch: 2324	Loss: 0.741539764404	Example Output: -	Example Reward:  0
Epoch: 2325	Loss: 0.791625988483	Example Output: -+-Y	Example Reward:  0.149482313769
Epoch: 2326	Loss: 0.728810673952	Example Output: *-+X+X+X+X	Example Reward:  0
Epoch: 2327	Loss: 0.748871302605	Example Output: -	Example Reward:  0
Epoch: 2328	Loss: 0.740195530653	Example Output: -++X	Example Reward:  0.111029637462
Epoch: 2329	Loss: 0.739994859695	Example Output: +Y+++X+Y+X	Example Reward:  0.280952882427
Epoch: 2330	Loss: 0.765656757355	Example Output: -+	Example Reward:  0
Saving Weight
Epoch: 2331	Loss: 0.742574346066	Example Output: +X++X+Y+Y	Example Reward:  0.280952882427
Epoch: 2332	Loss: 0.721082788706	Example Output: +Y++	Example Reward:  0
Epoch: 2333	Loss: 0.692752081156	Example Output: +Y+++X+X+Y	Example Reward:  0.280952882427
Epoch: 2334	Loss: 0.780437123775	Example Output: -+	Example Reward:  0
Epoch: 2335	Lo

Epoch: 2400	Loss: 0.691964530945	Example Output: -	Example Reward:  0
Prob
[[  3.01931485e-21   6.90340140e-21   1.58082898e-23   2.81127118e-23
    1.19289913e-23   2.11940546e-23   2.05995012e-23   1.69668093e-23
    1.48893141e-23   1.90073605e-23   2.36025842e-23   1.88837050e-23
    5.83919883e-01   1.78498074e-01   9.50130820e-02   6.09029122e-02
    8.16660896e-02]]
[[  1.08028466e-23   4.16050385e-23   5.27435381e-32   1.53510675e-31
    3.89705441e-32   8.74317182e-32   8.02257380e-32   4.47331583e-32
    5.13134317e-32   7.33663993e-32   1.17098809e-31   7.63903291e-32
    5.28569639e-01   7.63151050e-02   3.17387544e-02   1.30480984e-02
    3.50328505e-01]]
Gradient
[[-0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0.  0. -0. -0. -0.]
 [-0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0. -0.  0.]]
Saving Weight
Epoch: 2401	Loss: 0.849089056253	Example Output: 	Example Reward:  0
Epoch: 2402	Loss: 0.689685600996	Example Output: *++Y+Y+Y+X	Example Reward:  0
Epoch: 24

Epoch: 2486	Loss: 0.715155386925	Example Output: +Y+++X+X+Y	Example Reward:  0.280952882427
Epoch: 2487	Loss: 0.75178591609	Example Output: *++X+Y+X+X	Example Reward:  0
Epoch: 2488	Loss: 0.884220045805	Example Output: 	Example Reward:  0
Epoch: 2489	Loss: 0.865243297815	Example Output: *++Y+X	Example Reward:  0
Epoch: 2490	Loss: 0.892238694429	Example Output: +X++Y	Example Reward:  0.176764921791
Saving Weight
Epoch: 2491	Loss: 0.644990563393	Example Output: +X++X+Y+Y	Example Reward:  0.280952882427
Epoch: 2492	Loss: 0.73998953104	Example Output: +Y++*Y+X+X	Example Reward:  0
Epoch: 2493	Loss: 0.769985687733	Example Output: -++Y+Y+Y+Y	Example Reward:  0.176764921791
Epoch: 2494	Loss: 0.820636558533	Example Output: +X+-Y+Y+X	Example Reward:  0.176764921791
Epoch: 2495	Loss: 0.779694902897	Example Output: *++Y+Y+Y+Y	Example Reward:  0
Epoch: 2496	Loss: 0.888038754463	Example Output: -+	Example Reward:  0
Epoch: 2497	Loss: 0.72741625309	Example Output: +X	Example Reward:  0.149482313769


Epoch: 2551	Loss: 0.638368165493	Example Output: +Y+++X+Y+Y	Example Reward:  0.230337444003
Epoch: 2552	Loss: 0.752550363541	Example Output: 	Example Reward:  0
Epoch: 2553	Loss: 0.658931225538	Example Output: +X-+Y+Y+X	Example Reward:  0.176764921791
Epoch: 2554	Loss: 0.86246445775	Example Output: 	Example Reward:  0
Epoch: 2555	Loss: 0.685301375389	Example Output: +X++Y+X+Y	Example Reward:  0.280952882427
Epoch: 2556	Loss: 0.673939520121	Example Output: +X-+X+X+X	Example Reward:  0.176764921791
Epoch: 2557	Loss: 0.697420322895	Example Output: +X++Y+Y+Y	Example Reward:  0.230337444003
Epoch: 2558	Loss: 0.590895867348	Example Output: +Y+++Y+X+Y	Example Reward:  0.230337444003
Epoch: 2559	Loss: 0.716246563196	Example Output: *++Y+X+Y+Y	Example Reward:  0
Epoch: 2560	Loss: 0.668356150389	Example Output: +Y-+*Y+X+X	Example Reward:  0
Saving Weight
Epoch: 2561	Loss: 0.611738920212	Example Output: /*Y+X+Y+Y	Example Reward:  0
Epoch: 2562	Loss: 0.642520755529	Example Output: +Y++-X+Y+Y	Examp

Epoch: 2607	Loss: 0.67800205946	Example Output: +Y++-Y+X+Y	Example Reward:  0.176764921791
Epoch: 2608	Loss: 0.64460798502	Example Output: -++Y+X+X+Y	Example Reward:  0.176764921791
Epoch: 2609	Loss: 0.617993503809	Example Output: +Y+++Y+Y+Y	Example Reward:  0.189241565828
Epoch: 2610	Loss: 0.628250175714	Example Output: +X++Y+Y+Y	Example Reward:  0.230337444003
Saving Weight
Epoch: 2611	Loss: 0.765618795156	Example Output: -++X+X+X+X	Example Reward:  0.176764921791
Epoch: 2612	Loss: 0.69615727067	Example Output: --+Y+X+Y+Y	Example Reward:  0.230337444003
Epoch: 2613	Loss: 0.875788319111	Example Output: 	Example Reward:  0
Epoch: 2614	Loss: 0.666455882788	Example Output: -++Y+X+X+Y	Example Reward:  0.176764921791
Epoch: 2615	Loss: 0.651235157251	Example Output: /+X+X+X+Y	Example Reward:  0
Epoch: 2616	Loss: 0.762707173824	Example Output: 	Example Reward:  0
Epoch: 2617	Loss: 0.802347207069	Example Output: /+X+Y+X+Y	Example Reward:  0
Epoch: 2618	Loss: 0.623285621405	Example Output: +Y+

Epoch: 2670	Loss: 0.662854623795	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Saving Weight
Epoch: 2671	Loss: 0.716821819544	Example Output: +X++X+Y+Y	Example Reward:  0.280952882427
Epoch: 2672	Loss: 0.693058753014	Example Output: +X++Y+X+Y	Example Reward:  0.280952882427
Epoch: 2673	Loss: 0.707573288679	Example Output: +X++Y+X+X	Example Reward:  0.280952882427
Epoch: 2674	Loss: 0.788185346127	Example Output: 	Example Reward:  0
Epoch: 2675	Loss: 0.777687847614	Example Output: -+	Example Reward:  0
Epoch: 2676	Loss: 0.796322321892	Example Output: +Y++/X+Y+X	Example Reward:  0
Epoch: 2677	Loss: 0.648656231165	Example Output: +X++Y+X+X	Example Reward:  0.280952882427
Epoch: 2678	Loss: 0.785594761372	Example Output: +Y++	Example Reward:  0
Epoch: 2679	Loss: 0.72128329277	Example Output: +Y+++X+Y+Y	Example Reward:  0.230337444003
Epoch: 2680	Loss: 0.802848118544	Example Output: +X++Y+Y+Y	Example Reward:  0.230337444003
Saving Weight
Epoch: 2681	Loss: 0.628946971893	Example Ou

Epoch: 2701	Loss: 0.606366592646	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Epoch: 2702	Loss: 0.685405790806	Example Output: +X++Y*X+X	Example Reward:  -0.0539090397123
Epoch: 2703	Loss: 0.780188399553	Example Output: +X++X+Y+X	Example Reward:  0.280952882427
Epoch: 2704	Loss: 0.663288640976	Example Output: -+	Example Reward:  0
Epoch: 2705	Loss: 0.696864289045	Example Output: *++X+Y+X+X	Example Reward:  0
Epoch: 2706	Loss: 0.819992256165	Example Output: 	Example Reward:  0
Epoch: 2707	Loss: 0.683212596178	Example Output: +X++X-X+X	Example Reward:  0.176764921791
Epoch: 2708	Loss: 0.763326513767	Example Output: +X+-Y+Y+X	Example Reward:  0.176764921791
Epoch: 2709	Loss: 0.718722730875	Example Output: -++X+X+X+Y	Example Reward:  0.176764921791
Epoch: 2710	Loss: 0.701501405239	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Saving Weight
Epoch: 2711	Loss: 0.679734915495	Example Output: 	Example Reward:  0
Epoch: 2712	Loss: 0.624369186163	Example Output: +Y++-X+X+

Epoch: 2789	Loss: 0.655314189196	Example Output: +Y++*Y+Y+Y	Example Reward:  0
Epoch: 2790	Loss: 0.601775020361	Example Output: +Y++-X+X+Y	Example Reward:  0.176764921791
Saving Weight
Epoch: 2791	Loss: 0.697834384441	Example Output: -*/X+Y+Y+Y	Example Reward:  0
Epoch: 2792	Loss: 0.718160319328	Example Output: +Y++-Y+Y+X	Example Reward:  0.176764921791
Epoch: 2793	Loss: 0.661582052708	Example Output: +Y++-Y+Y+Y	Example Reward:  0.176764921791
Epoch: 2794	Loss: 0.669867932796	Example Output: /+X+X+Y+Y	Example Reward:  0
Epoch: 2795	Loss: 0.786767047644	Example Output: +X+-Y+Y+Y	Example Reward:  0.176764921791
Epoch: 2796	Loss: 0.67867590189	Example Output: +X++X+X+X	Example Reward:  0.230337444003
Epoch: 2797	Loss: 0.807147806883	Example Output: +X++X+X+Y	Example Reward:  0.280952882427
Epoch: 2798	Loss: 0.710157173872	Example Output: +X++Y+Y+X	Example Reward:  0.280952882427
Epoch: 2799	Loss: 0.657432615757	Example Output: +Y+++X+X+Y	Example Reward:  0.280952882427
Epoch: 2800	Loss: 0

Epoch: 2816	Loss: 0.717607676983	Example Output: /-X+X+X+X	Example Reward:  0
Epoch: 2817	Loss: 0.696080499887	Example Output: -++X+X+X+X	Example Reward:  0.176764921791
Epoch: 2818	Loss: 0.700756943226	Example Output: -*+X+Y+X+X	Example Reward:  0
Epoch: 2819	Loss: 0.809022259712	Example Output: +Y++	Example Reward:  0
Epoch: 2820	Loss: 0.812471669912	Example Output: -++Y+Y+Y+Y	Example Reward:  0.176764921791
Saving Weight
Epoch: 2821	Loss: 0.895830982924	Example Output: +X/+X+Y+X	Example Reward:  0.177627063972
Epoch: 2822	Loss: 0.645312440395	Example Output: +Y+++X+Y+X	Example Reward:  0.280952882427
Epoch: 2823	Loss: 0.716858124733	Example Output: +X++X+Y-Y	Example Reward:  0.176764921791
Epoch: 2824	Loss: 0.819239032269	Example Output: -	Example Reward:  0
Epoch: 2825	Loss: 0.811160844564	Example Output: +X++Y+Y+X	Example Reward:  0.280952882427
Epoch: 2826	Loss: 0.856865674257	Example Output: +Y++-Y+Y+X	Example Reward:  0.176764921791
Epoch: 2827	Loss: 0.879642236233	Example Outp

Epoch: 2901	Loss: 1.13918699026	Example Output: 	Example Reward:  0
Epoch: 2902	Loss: 0.976668286324	Example Output: +Y++-Y-Y+Y	Example Reward:  0.128320278918
Epoch: 2903	Loss: 1.17164772749	Example Output: /	Example Reward:  0
Epoch: 2904	Loss: 0.995420354605	Example Output: *+	Example Reward:  0
Epoch: 2905	Loss: 1.03943977356	Example Output: +Y++	Example Reward:  0
Epoch: 2906	Loss: 0.908508211374	Example Output: +X++Y-Y+X	Example Reward:  0.176764921791
Epoch: 2907	Loss: 1.0251033783	Example Output: 	Example Reward:  0
Epoch: 2908	Loss: 1.05675914884	Example Output: --	Example Reward:  0
Epoch: 2909	Loss: 1.11021310687	Example Output: 	Example Reward:  0
Epoch: 2910	Loss: 1.20135603547	Example Output: 	Example Reward:  0
Saving Weight
Epoch: 2911	Loss: 0.982574558258	Example Output: /	Example Reward:  0
Epoch: 2912	Loss: 1.10338976979	Example Output: +X+	Example Reward:  0
Epoch: 2913	Loss: 0.95917532444	Example Output: +X+-Y+Y+X	Example Reward:  0.176764921791
Epoch: 2914	Loss: 1

Epoch: 3002	Loss: 1.12778643966	Example Output: -+	Example Reward:  0
Epoch: 3003	Loss: 1.02294555902	Example Output: +X++X+Y+X	Example Reward:  0.280952882427
Epoch: 3004	Loss: 1.04275317192	Example Output: -	Example Reward:  0
Epoch: 3005	Loss: 1.12456051111	Example Output: 	Example Reward:  0
Epoch: 3006	Loss: 0.954007226229	Example Output: +X--Y-X+X	Example Reward:  0.176764921791
Epoch: 3007	Loss: 0.871132951975	Example Output: +X++Y-X+Y	Example Reward:  0.176764921791
Epoch: 3008	Loss: 1.11709440947	Example Output: +Y-++Y-Y-Y	Example Reward:  0.0964106084126
Epoch: 3009	Loss: 1.18190754056	Example Output: -	Example Reward:  0
Epoch: 3010	Loss: 0.921716147661	Example Output: +Y++*X+X-X	Example Reward:  0
Saving Weight
Epoch: 3011	Loss: 1.10473755002	Example Output: +Y++-X-Y+Y	Example Reward:  0.128320278918
Epoch: 3012	Loss: 1.08390665054	Example Output: +X/+X+Y-X	Example Reward:  0.128836623144
Epoch: 3013	Loss: 0.910770404339	Example Output: +Y+++X+Y+X	Example Reward:  0.2809528

Epoch: 3102	Loss: 0.8443313241	Example Output: +X++X	Example Reward:  0.176764921791
Epoch: 3103	Loss: 0.885227054358	Example Output: +Y--+X+Y+X	Example Reward:  0.280952882427
Epoch: 3104	Loss: 1.10361620784	Example Output: 	Example Reward:  0
Epoch: 3105	Loss: 0.844861459732	Example Output: +Y+++Y+Y+X	Example Reward:  0.230337444003
Epoch: 3106	Loss: 0.936943727732	Example Output: *	Example Reward:  0
Epoch: 3107	Loss: 0.834695470333	Example Output: 	Example Reward:  0
Epoch: 3108	Loss: 0.807815659046	Example Output: -	Example Reward:  0
Epoch: 3109	Loss: 1.11942293048	Example Output: 	Example Reward:  0
Epoch: 3110	Loss: 0.872751545906	Example Output: *	Example Reward:  0
Saving Weight
Epoch: 3111	Loss: 1.00029428601	Example Output: *+	Example Reward:  0
Epoch: 3112	Loss: 1.06865751743	Example Output: 	Example Reward:  0
Epoch: 3113	Loss: 0.95299089551	Example Output: -	Example Reward:  0
Epoch: 3114	Loss: 1.03250277042	Example Output: 	Example Reward:  0
Epoch: 3115	Loss: 1.0648741

Epoch: 3201	Loss: 0.858797866106	Example Output: +Y++	Example Reward:  0
Epoch: 3202	Loss: 1.0209921658	Example Output: -	Example Reward:  0
Epoch: 3203	Loss: 1.17466056347	Example Output: 	Example Reward:  0
Epoch: 3204	Loss: 1.00842552781	Example Output: 	Example Reward:  0
Epoch: 3205	Loss: 0.97148861289	Example Output: *	Example Reward:  0
Epoch: 3206	Loss: 1.02145224214	Example Output: 	Example Reward:  0
Epoch: 3207	Loss: 0.89487131238	Example Output: +X+	Example Reward:  0
Epoch: 3208	Loss: 1.03024564385	Example Output: +Y+++Y+Y+Y	Example Reward:  0.189241565828
Epoch: 3209	Loss: 0.805017131567	Example Output: +Y-+	Example Reward:  0
Epoch: 3210	Loss: 0.841810852289	Example Output: +X+	Example Reward:  0
Saving Weight
Epoch: 3211	Loss: 1.04402893782	Example Output: -	Example Reward:  0
Epoch: 3212	Loss: 0.88379316926	Example Output: +Y++	Example Reward:  0
Epoch: 3213	Loss: 0.88417712152	Example Output: -	Example Reward:  0
Epoch: 3214	Loss: 1.01662717462	Example Output: 	Exampl

Epoch: 3303	Loss: 0.960740602016	Example Output: 	Example Reward:  0
Epoch: 3304	Loss: 0.920056706667	Example Output: 	Example Reward:  0
Epoch: 3305	Loss: 0.92255166769	Example Output: +Y++	Example Reward:  0
Epoch: 3306	Loss: 0.854103696346	Example Output: 	Example Reward:  0
Epoch: 3307	Loss: 0.956970202923	Example Output: +X+	Example Reward:  0
Epoch: 3308	Loss: 0.892285662889	Example Output: 	Example Reward:  0
Epoch: 3309	Loss: 0.912570023537	Example Output: +Y++	Example Reward:  0
Epoch: 3310	Loss: 1.00436478257	Example Output: 	Example Reward:  0
Saving Weight
Epoch: 3311	Loss: 0.835899174213	Example Output: --	Example Reward:  0
Epoch: 3312	Loss: 0.983339691162	Example Output: 	Example Reward:  0
Epoch: 3313	Loss: 0.916798645258	Example Output: +Y++	Example Reward:  0
Epoch: 3314	Loss: 0.803154402971	Example Output: 	Example Reward:  0
Epoch: 3315	Loss: 0.812766420841	Example Output: /	Example Reward:  0
Epoch: 3316	Loss: 0.815947109461	Example Output: +X+	Example Reward:  0
E

Epoch: 3400	Loss: 0.735560202599	Example Output: +X-	Example Reward:  0
Prob
[[  4.51061421e-26   1.13117292e-25   3.82152258e-29   7.29731967e-29
    2.74987347e-29   5.64616396e-29   5.30808742e-29   4.22304226e-29
    3.61530273e-29   4.52890070e-29   6.54696713e-29   4.93961038e-29
    5.65003633e-01   2.25247040e-01   4.23833951e-02   3.08185108e-02
    1.36547476e-01]]
[[  4.66759413e-01   5.33240557e-01   1.15056274e-18   1.38758948e-18
    7.37353761e-19   1.31505701e-18   1.08020234e-18   7.70949960e-19
    7.33275295e-19   1.03722080e-18   1.36002462e-18   1.13392679e-18
    7.44884462e-21   5.38065560e-21   1.47054053e-21   5.92391373e-22
    4.14673318e-19]]
[[ 0.          0.          0.          0.          0.          0.          0.
   0.          0.          0.          0.          0.          0.88081723
   0.1044144   0.00511403  0.00418478  0.00546968]]
[[  7.12120792e-14   1.86831074e-13   3.93240661e-28   9.84910366e-28
    2.38492479e-28   6.47760826e-28   6.1604976

Epoch: 3470	Loss: 0.716974914074	Example Output: +Y+++X+X+X	Example Reward:  0.280952882427
Saving Weight
Epoch: 3471	Loss: 0.700578916073	Example Output: +X+	Example Reward:  0
Epoch: 3472	Loss: 0.715643298626	Example Output: +X+	Example Reward:  0
Epoch: 3473	Loss: 0.693285101652	Example Output: +Y++-X+Y+X	Example Reward:  0.176764921791
Epoch: 3474	Loss: 0.816904628277	Example Output: 	Example Reward:  0
Epoch: 3475	Loss: 0.911615568399	Example Output: -	Example Reward:  0
Epoch: 3476	Loss: 0.910496968031	Example Output: 	Example Reward:  0
Epoch: 3477	Loss: 0.736984670162	Example Output: +X-	Example Reward:  0
Epoch: 3478	Loss: 0.781915843487	Example Output: -	Example Reward:  0
Epoch: 3479	Loss: 0.917113411427	Example Output: 	Example Reward:  0
Epoch: 3480	Loss: 0.788183307648	Example Output: /+X+X+Y+X	Example Reward:  0
Saving Weight
Epoch: 3481	Loss: 0.758575189114	Example Output: /	Example Reward:  0
Epoch: 3482	Loss: 0.695167738199	Example Output: +Y+++Y/Y+Y	Example Reward:  

KeyboardInterrupt: 