In [36]:
import random
import gym
import numpy as np
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import Reshape
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
import time

In [37]:
action_space = 16

In [38]:
def cur_model(num_Conv2D,num_filters,num_kernel,num_lstm_units,num_action, num_frames):
  model = Sequential()
  model.add(Dense(num_action))  
  model.add(LSTM(num_lstm_units)) # LSTM
  model.add(Dense(num_action))  
  model.compile(loss='mean_squared_error', optimizer='Adam') # model compiles
  return model

In [39]:
space_high = 1
space_low = -1
discrete_space = np.linspace(space_low, space_high, 16)

In [40]:
def reinforcement(env, model, n_games=400, epochs=16, epsilon_decrease=0.05, backsight=8):
  epsilon = 3 # epsilon allows the model to explore new actions
  state_history = []
  value_history = []
  env.reset() # reset the enviornment state when starting.
  obs = env.observation_space.sample() # array with shape (x,) with values corresponding to each observation of the game. x is the number of observation.

  for game_number in range(n_games):
    states = []
    actions = []
    values = []
    memory = [obs] * backsight # after np.sum(obs,axis=2), shape changes to 2d so reshape it into 3d and divide by (255*3) for standarlization. Then multiply backsight.
    # before np.sum, it will be 3d but np.sum will sum every [] inside of the 3d array.
    frames = 0

    while True:
      if random.random() > epsilon: #first state, epsilon will always bigger but because epsilon decreases incrementally during the game, model will favor optimized policy model compare to random action.
        action_index = np.argmax(model.predict(np.array([memory], dtype='float16'))[0])
        action = discrete_space[action_index] # LSTM outputs a single value, so choose the first index.
      else:
        action_index = np.random.randint(0,len(discrete_space)-1)
        action = discrete_space[action_index] # this is random action and outputs discrete integer value i.e. 1
      
      obs, reward, done, _info = env.step([action]) # returns observation_spcae, amount of reward, boolean value if episode has terminated, and info about state.
      env.render()  
      time.sleep(0.05)
      if frames % 1 == 0: # for every 32th frames
        print('Game', game_number, 'frame', frames) 
        actions.append(action_index) # action taken gets added to the array.
        states.append(memory) # obersvation states array gets added to the array/
        values.append([0] * action_space) # if action_space.n = 2, then it will be in a shape of [0,0]
        memory.pop(0) # removes the first index from memory array.
        memory.append(obs) # and replace with current observation_space. 

        for i, scores in enumerate(values):
          scores[actions[i]] += reward # for example, if there 2 action space, then actions[i] will consist of (0,0),(0,1),(1,0),(1,1). it will then add reward.

      frames += 1 # frames added after every episode.
      
      if done:
        print('Game', game_number, 'lasted', frames, 'frames')
        env.reset() # resets state.
        break
    
    state_history.extend(states) # observation states gets added to history.
    value_history.extend(values) # value get added to history. 
    model.fit(np.array(state_history, dtype='float16'), np.array(value_history)) # model.fit(x,y), x is state_history and y is value_history. Thus it's using state_history to predict possible values. Learning occurs here.
    epsilon = max(0, epsilon - epsilon_decrease) # eplison decreases until 0.

In [41]:
env = gym.make("MountainCarContinuous-v0")
model = cur_model(4, 8, 4,8, action_space, 8)

In [42]:
reinforcement(env, model, n_games=4, epochs=1)

If you want to render in human mode, initialize the environment in this way: gym.make('EnvName', render_mode='human') and don't call the render method.
See here for more information: https://www.gymlibrary.ml/content/api/[0m
  deprecation(


Game 0 frame 0
Game 0 frame 1
Game 0 frame 2
Game 0 frame 3
Game 0 frame 4
Game 0 frame 5
Game 0 frame 6
Game 0 frame 7
Game 0 frame 8
Game 0 frame 9
Game 0 frame 10
Game 0 frame 11
Game 0 frame 12
Game 0 frame 13
Game 0 frame 14
Game 0 frame 15
Game 0 frame 16
Game 0 frame 17
Game 0 frame 18
Game 0 frame 19
Game 0 frame 20
Game 0 frame 21
Game 0 frame 22
Game 0 frame 23
Game 0 frame 24
Game 0 frame 25
Game 0 frame 26
Game 0 frame 27
Game 0 frame 28
Game 0 frame 29
Game 0 frame 30
Game 0 frame 31
Game 0 frame 32
Game 0 frame 33
Game 0 frame 34
Game 0 frame 35
Game 0 frame 36
Game 0 frame 37
Game 0 frame 38
Game 0 frame 39
Game 0 frame 40
Game 0 frame 41
Game 0 frame 42
Game 0 frame 43
Game 0 frame 44
Game 0 frame 45
Game 0 frame 46
Game 0 frame 47
Game 0 frame 48
Game 0 frame 49
Game 0 frame 50
Game 0 frame 51
Game 0 frame 52
Game 0 frame 53
Game 0 frame 54
Game 0 frame 55
Game 0 frame 56
Game 0 frame 57
Game 0 frame 58
Game 0 frame 59
Game 0 frame 60
Game 0 frame 61
Game 0 frame 62
Ga

Game 0 frame 492
Game 0 frame 493
Game 0 frame 494
Game 0 frame 495
Game 0 frame 496
Game 0 frame 497
Game 0 frame 498
Game 0 frame 499
Game 0 frame 500
Game 0 frame 501
Game 0 frame 502
Game 0 frame 503
Game 0 frame 504
Game 0 frame 505
Game 0 frame 506
Game 0 frame 507
Game 0 frame 508
Game 0 frame 509
Game 0 frame 510
Game 0 frame 511
Game 0 frame 512
Game 0 frame 513
Game 0 frame 514
Game 0 frame 515
Game 0 frame 516
Game 0 frame 517
Game 0 frame 518
Game 0 frame 519
Game 0 frame 520
Game 0 frame 521
Game 0 frame 522
Game 0 frame 523
Game 0 frame 524
Game 0 frame 525
Game 0 frame 526
Game 0 frame 527
Game 0 frame 528
Game 0 frame 529
Game 0 frame 530
Game 0 frame 531
Game 0 frame 532
Game 0 frame 533
Game 0 frame 534
Game 0 frame 535
Game 0 frame 536
Game 0 frame 537
Game 0 frame 538
Game 0 frame 539
Game 0 frame 540
Game 0 frame 541
Game 0 frame 542
Game 0 frame 543
Game 0 frame 544
Game 0 frame 545
Game 0 frame 546
Game 0 frame 547
Game 0 frame 548
Game 0 frame 549
Game 0 frame 5

Game 0 frame 976
Game 0 frame 977
Game 0 frame 978
Game 0 frame 979
Game 0 frame 980
Game 0 frame 981
Game 0 frame 982
Game 0 frame 983
Game 0 frame 984
Game 0 frame 985
Game 0 frame 986
Game 0 frame 987
Game 0 frame 988
Game 0 frame 989
Game 0 frame 990
Game 0 frame 991
Game 0 frame 992
Game 0 frame 993
Game 0 frame 994
Game 0 frame 995
Game 0 frame 996
Game 0 frame 997
Game 0 frame 998
Game 0 lasted 999 frames
Game 1 frame 0
Game 1 frame 1
Game 1 frame 2
Game 1 frame 3
Game 1 frame 4
Game 1 frame 5
Game 1 frame 6
Game 1 frame 7
Game 1 frame 8
Game 1 frame 9
Game 1 frame 10
Game 1 frame 11
Game 1 frame 12
Game 1 frame 13
Game 1 frame 14
Game 1 frame 15
Game 1 frame 16
Game 1 frame 17
Game 1 frame 18
Game 1 frame 19
Game 1 frame 20
Game 1 frame 21
Game 1 frame 22
Game 1 frame 23
Game 1 frame 24
Game 1 frame 25
Game 1 frame 26
Game 1 frame 27
Game 1 frame 28
Game 1 frame 29
Game 1 frame 30
Game 1 frame 31
Game 1 frame 32
Game 1 frame 33
Game 1 frame 34
Game 1 frame 35
Game 1 frame 36
Ga

Game 1 frame 460
Game 1 frame 461
Game 1 frame 462
Game 1 frame 463
Game 1 frame 464
Game 1 frame 465
Game 1 frame 466
Game 1 frame 467
Game 1 frame 468
Game 1 frame 469
Game 1 frame 470
Game 1 frame 471
Game 1 frame 472
Game 1 frame 473
Game 1 frame 474
Game 1 frame 475
Game 1 frame 476
Game 1 frame 477
Game 1 frame 478
Game 1 frame 479
Game 1 frame 480
Game 1 frame 481
Game 1 frame 482
Game 1 frame 483
Game 1 frame 484
Game 1 frame 485
Game 1 frame 486
Game 1 frame 487
Game 1 frame 488
Game 1 frame 489
Game 1 frame 490
Game 1 frame 491
Game 1 frame 492
Game 1 frame 493
Game 1 frame 494
Game 1 frame 495
Game 1 frame 496
Game 1 frame 497
Game 1 frame 498
Game 1 frame 499
Game 1 frame 500
Game 1 frame 501
Game 1 frame 502
Game 1 frame 503
Game 1 frame 504
Game 1 frame 505
Game 1 frame 506
Game 1 frame 507
Game 1 frame 508
Game 1 frame 509
Game 1 frame 510
Game 1 frame 511
Game 1 frame 512
Game 1 frame 513
Game 1 frame 514
Game 1 frame 515
Game 1 frame 516
Game 1 frame 517
Game 1 frame 5

Game 1 frame 942
Game 1 frame 943
Game 1 frame 944
Game 1 frame 945
Game 1 frame 946
Game 1 frame 947
Game 1 frame 948
Game 1 frame 949
Game 1 frame 950
Game 1 frame 951
Game 1 frame 952
Game 1 frame 953
Game 1 frame 954
Game 1 frame 955
Game 1 frame 956
Game 1 frame 957
Game 1 frame 958
Game 1 frame 959
Game 1 frame 960
Game 1 frame 961
Game 1 frame 962
Game 1 frame 963
Game 1 frame 964
Game 1 frame 965
Game 1 frame 966
Game 1 frame 967
Game 1 frame 968
Game 1 frame 969
Game 1 frame 970
Game 1 frame 971
Game 1 frame 972
Game 1 frame 973
Game 1 frame 974
Game 1 frame 975
Game 1 frame 976
Game 1 frame 977
Game 1 frame 978
Game 1 frame 979
Game 1 frame 980
Game 1 frame 981
Game 1 frame 982
Game 1 frame 983
Game 1 frame 984
Game 1 frame 985
Game 1 frame 986
Game 1 frame 987
Game 1 frame 988
Game 1 frame 989
Game 1 frame 990
Game 1 frame 991
Game 1 frame 992
Game 1 frame 993
Game 1 frame 994
Game 1 frame 995
Game 1 frame 996
Game 1 frame 997
Game 1 frame 998
Game 1 lasted 999 frames
Game 2

Game 2 frame 426
Game 2 frame 427
Game 2 frame 428
Game 2 frame 429
Game 2 frame 430
Game 2 frame 431
Game 2 frame 432
Game 2 frame 433
Game 2 frame 434
Game 2 frame 435
Game 2 frame 436
Game 2 frame 437
Game 2 frame 438
Game 2 frame 439
Game 2 frame 440
Game 2 frame 441
Game 2 frame 442
Game 2 frame 443
Game 2 frame 444
Game 2 frame 445
Game 2 frame 446
Game 2 frame 447
Game 2 frame 448
Game 2 frame 449
Game 2 frame 450
Game 2 frame 451
Game 2 frame 452
Game 2 frame 453
Game 2 frame 454
Game 2 frame 455
Game 2 frame 456
Game 2 frame 457
Game 2 frame 458
Game 2 frame 459
Game 2 frame 460
Game 2 frame 461
Game 2 frame 462
Game 2 frame 463
Game 2 frame 464
Game 2 frame 465
Game 2 frame 466
Game 2 frame 467
Game 2 frame 468
Game 2 frame 469
Game 2 frame 470
Game 2 frame 471
Game 2 frame 472
Game 2 frame 473
Game 2 frame 474
Game 2 frame 475
Game 2 frame 476
Game 2 frame 477
Game 2 frame 478
Game 2 frame 479
Game 2 frame 480
Game 2 frame 481
Game 2 frame 482
Game 2 frame 483
Game 2 frame 4

Game 2 frame 908
Game 2 frame 909
Game 2 frame 910
Game 2 frame 911
Game 2 frame 912
Game 2 frame 913
Game 2 frame 914
Game 2 frame 915
Game 2 frame 916
Game 2 frame 917
Game 2 frame 918
Game 2 frame 919
Game 2 frame 920
Game 2 frame 921
Game 2 frame 922
Game 2 frame 923
Game 2 frame 924
Game 2 frame 925
Game 2 frame 926
Game 2 frame 927
Game 2 frame 928
Game 2 frame 929
Game 2 frame 930
Game 2 frame 931
Game 2 frame 932
Game 2 frame 933
Game 2 frame 934
Game 2 frame 935
Game 2 frame 936
Game 2 frame 937
Game 2 frame 938
Game 2 frame 939
Game 2 frame 940
Game 2 frame 941
Game 2 frame 942
Game 2 frame 943
Game 2 frame 944
Game 2 frame 945
Game 2 frame 946
Game 2 frame 947
Game 2 frame 948
Game 2 frame 949
Game 2 frame 950
Game 2 frame 951
Game 2 frame 952
Game 2 frame 953
Game 2 frame 954
Game 2 frame 955
Game 2 frame 956
Game 2 frame 957
Game 2 frame 958
Game 2 frame 959
Game 2 frame 960
Game 2 frame 961
Game 2 frame 962
Game 2 frame 963
Game 2 frame 964
Game 2 frame 965
Game 2 frame 9

Game 3 frame 392
Game 3 frame 393
Game 3 frame 394
Game 3 frame 395
Game 3 frame 396
Game 3 frame 397
Game 3 frame 398
Game 3 frame 399
Game 3 frame 400
Game 3 frame 401
Game 3 frame 402
Game 3 frame 403
Game 3 frame 404
Game 3 frame 405
Game 3 frame 406
Game 3 frame 407
Game 3 frame 408
Game 3 frame 409
Game 3 frame 410
Game 3 frame 411
Game 3 frame 412
Game 3 frame 413
Game 3 frame 414
Game 3 frame 415
Game 3 frame 416
Game 3 frame 417
Game 3 frame 418
Game 3 frame 419
Game 3 frame 420
Game 3 frame 421
Game 3 frame 422
Game 3 frame 423
Game 3 frame 424
Game 3 frame 425
Game 3 frame 426
Game 3 frame 427
Game 3 frame 428
Game 3 frame 429
Game 3 frame 430
Game 3 frame 431
Game 3 frame 432
Game 3 frame 433
Game 3 frame 434
Game 3 frame 435
Game 3 frame 436
Game 3 frame 437
Game 3 frame 438
Game 3 frame 439
Game 3 frame 440
Game 3 frame 441
Game 3 frame 442
Game 3 frame 443
Game 3 frame 444
Game 3 frame 445
Game 3 frame 446
Game 3 frame 447
Game 3 frame 448
Game 3 frame 449
Game 3 frame 4

Game 3 frame 874
Game 3 frame 875
Game 3 frame 876
Game 3 frame 877
Game 3 frame 878
Game 3 frame 879
Game 3 frame 880
Game 3 frame 881
Game 3 frame 882
Game 3 frame 883
Game 3 frame 884
Game 3 frame 885
Game 3 frame 886
Game 3 frame 887
Game 3 frame 888
Game 3 frame 889
Game 3 frame 890
Game 3 frame 891
Game 3 frame 892
Game 3 frame 893
Game 3 frame 894
Game 3 frame 895
Game 3 frame 896
Game 3 frame 897
Game 3 frame 898
Game 3 frame 899
Game 3 frame 900
Game 3 frame 901
Game 3 frame 902
Game 3 frame 903
Game 3 frame 904
Game 3 frame 905
Game 3 frame 906
Game 3 frame 907
Game 3 frame 908
Game 3 frame 909
Game 3 frame 910
Game 3 frame 911
Game 3 frame 912
Game 3 frame 913
Game 3 frame 914
Game 3 frame 915
Game 3 frame 916
Game 3 frame 917
Game 3 frame 918
Game 3 frame 919
Game 3 frame 920
Game 3 frame 921
Game 3 frame 922
Game 3 frame 923
Game 3 frame 924
Game 3 frame 925
Game 3 frame 926
Game 3 frame 927
Game 3 frame 928
Game 3 frame 929
Game 3 frame 930
Game 3 frame 931
Game 3 frame 9