# Validation code
This code is used to validate the best weights obtained from training the model for two of different goals (Key C and D) and output a gif which shows the movement of the fingers. The different goals available are keys C, D, E and F, in increasing order of difficulty respectively. The best weights were obtained for keys C and D.

In [None]:
from ipynb.fs.full.PianoHandv1 import *

# Specify the goal (key) to test here. Either 'C' or 'D'
test_key = 'D'

In [None]:
env = PianoHandEnv(test_key) 

# Rebuild the neural network architecture 
num_inputs = env.observation_space.shape[0]
num_actions = env.action_space.n
num_hidden = 2

inputs = layers.Input(shape=(num_inputs,))
common = layers.Dense(num_hidden, activation="relu")(inputs)
action = layers.Dense(num_actions, activation="softmax")(common)
critic = layers.Dense(1)(common)

testmodel = keras.Model(inputs=inputs, outputs=[action, critic])
testmodel.summary()

# Load weights
weight_path = "./Weights/key" + test_key + ".h5"
testmodel.load_weights(weight_path)                                 

The maximum reward is obtained when the agent plays the speficied key and is 200.

In [None]:
# Initialise variables
episode_reward = 0
episodes = 1
max_steps = 100
images=[] #to store each frame of the episode

# Validate model with saved weights
for i in range(episodes):
    st = env.reset()
    
    # Convert shape of state to required shape
    state= (st[0][0], st[0][1], st[1][0], st[1][1], st[2][0], st[2][1], st[3][0], st[3][1])
    state = tf.convert_to_tensor(state)
    state = tf.expand_dims(state, 0)
    
    done = False
    for j in range(max_steps):
        
        # Use the neural network model with saved weights to predict action
        prediction = np.array((testmodel.predict(state))[0])
        action = np.argmax(prediction)
        
        nst, reward, done, _, _ = env.step(action)
    
        next_state= (nst[0][0], nst[0][1], nst[1][0], nst[1][1], nst[2][0], nst[2][1], nst[3][0], nst[3][1])
        state = next_state
        state = tf.convert_to_tensor(state)
        state = tf.expand_dims(state, 0)
        
        episode_reward += reward
        images.append(env.render(j, done))
        
        if done:
            if i == episodes:
                pass
            break        
    
    print("TimeStep", i+1, end='\r')
    
# Display average rewards earned
print('Average Reward:', episode_reward / episodes)

In [None]:
#converting it to a gif
imageio.mimwrite("./GIFS/ValidationCode"+ test_key +".gif", images)
#This will save the gif in the GIF folder
#View there or view the preloaded GIFS below


Preloaded Gifs:
C:
<img src="GIFS/PreloadedC.gif" width="750" align="center">


D:
<img src="GIFS/PreloadedD.gif" width="750" align="center">