In [None]:
import zipfile
zip = zipfile.ZipFile('deploy_5_26_v2.zip')
zip.extractall()

In [None]:
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Restrict TensorFlow to only use the fourth GPU
        tf.config.experimental.set_visible_devices(gpus[0], 'GPU')

        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

In [None]:
import collections

import gym
from myutils.trainingClasses.MountainCarConvolutionalTraining import MountainCarConvolutionalTraining
from myutils.gym_custom.gym_custom import MountainCarEnvWrapper

import numpy as np
from myutils.xvfbwrapper import Xvfb

vdisplay = Xvfb()
vdisplay.start()

try:

    env = MountainCarEnvWrapper(gym.make('MountainCar-v0'))

    print("Env created")
    env.reset()

    agent = MountainCarConvolutionalTraining(env,'offline')

    print("Started training \n ---------\n")
    agent.start()
finally:
    vdisplay.stop()

print("Finished training")

In [None]:

from collections import deque
import gym
from keras import models
from myutils.performanceAnalyzation.Analytics import Analytics
import datetime
from myutils.trainingClasses.MountainCarConvolutionalTraining import MountainCarConvolutionalTraining as conv_trainor
import numpy as np
import myutils.constants.Constants as cts
from myutils.gym_custom.gym_custom import MountainCarEnvWrapper
import csv

import os

from myutils.xvfbwrapper import Xvfb

vdisplay = Xvfb()
vdisplay.start()

try:

    env = MountainCarEnvWrapper(gym.make('MountainCar-v0').env)

    time_steps_in_episode = 300
    episodes = 100

    models_from_training = os.listdir(cts.Constants.PATH_TO_MODELS_TRACKING_PROGRESS_TRESHOLD_SAVES)

    number_of_models_saved = len(models_from_training)

    for current_model_tested in range(number_of_models_saved):

        model=models.load_model(cts.Constants.PATH_TO_MODELS_TRACKING_PROGRESS_TRESHOLD_SAVES+models_from_training[current_model_tested])

        stack_depth = 4
        frame_skip = 4

        frames_memory = deque(maxlen=stack_depth)


        for ep in range(episodes):

            env.reset()

            reward_sum=0

            current_image =env.render(mode='rgb_array')
            current_frame =conv_trainor.process_image(conv_trainor ,current_image)  # the frame is an greyscale image of the current position
            current_frame = current_frame.reshape(1, current_frame.shape[0], current_frame.shape[1])
            current_state = np.repeat(current_frame,stack_depth, axis=0)
            frames_memory.extend(current_state)

            for t in range(time_steps_in_episode):

                if (t% frame_skip) == 0:
                    current_state = current_state.reshape(1, current_state.shape[0], current_state.shape[1], current_state.shape[2])
                    best_action = np.argmax(model.predict([current_state,np.ones((1,3))]))


                new_state_numerical, reward, done, _ = env.step_with_custom_reward(best_action)

                new_image = env.render(mode='rgb_array')
                next_frame = conv_trainor.process_image(conv_trainor,new_image)
                next_frame = next_frame.reshape(next_frame.shape[0], next_frame.shape[1])

                # current_state is a FIFO buffer so just by appending the size  of current_state is constant
                frames_memory.append(next_frame)

                new_state = np.asarray(frames_memory)

                # make the training possible only when the minimum experience was gathered

                reward_sum += reward
                current_state = new_state

                if done:
                    break

            with open('./test_during_training_analysis.csv', mode='a+', newline='') as numerical_data:
                numerical_data_writer = csv.writer(numerical_data, delimiter=',', quotechar='"',
                                                quoting=csv.QUOTE_MINIMAL)
                numerical_data_writer.writerow([ep, reward_sum, 1 if done else 0])
finally:
    vdisplay.stop()

print("Finished validation")

In [None]:
import numpy as np
import csv

a = []

with open('./rewards_in_episodes.csv', mode='r', newline='') as numerical_data:
    reader = csv.reader(numerical_data,delimiter=',')
    for row in reader:
        a.extend([np.asarray([float(row[0]),float(row[1]),float(row[2]) ] ) ] ),

a = np.asarray(a)
episodes = a[:,0]
rewards = a[:,1]
episodes_won = a[:,2]

import matplotlib.pyplot as plt
plt.figure(figsize=(17,4))
plt.plot(episodes,rewards, label="rewards/episode")
plt.plot(episodes,episodes_won,label="episode won")
plt.xlabel('episodes')
plt.ylabel('total reward/episode')
plt.legend(loc="upper left",bbox_to_anchor=(0,1.18))
plt.xticks(np.linspace(0,100,21))
plt.savefig("training_process_analysis_on_100_episodes.png")
print("Done")

In [None]:

import numpy as np
import csv

a = []
number_of_validation_eps_for_model = 100
number_of_models = 9
with open('./test_during_training_analysis.csv', mode='r', newline='') as numerical_data:
    reader = csv.reader(numerical_data,delimiter=',')
    for row in reader:
        a.extend([np.asarray([float(row[0]),float(row[1]),float(row[2]) ] ) ] ),

a = np.asarray(a)
episodes = a[:,0]

#because for each new model the episodes start from 0, in order to include all validation data for all the models in 1 graph,
# I changed the validation episodes for each new model to start instead of  0 with 0+current_model*100(if the data is generated for 100 eps/model)
for i in range(number_of_models):
   a[i*number_of_validation_eps_for_model:(i+1)*number_of_validation_eps_for_model,0] = \
       a[i*number_of_validation_eps_for_model:(i+1)*number_of_validation_eps_for_model,0] + np.ones(number_of_validation_eps_for_model)*i*number_of_validation_eps_for_model

rewards = a[:,1]
episodes_won = a[:,2]

import matplotlib.pyplot as plt
plt.figure(figsize=(17,4))
plt.plot(episodes,rewards, label="rewards/episode")
plt.plot(episodes,episodes_won,label="episode won")
plt.xlabel('episodes')
plt.ylabel('total reward/episode')
plt.legend(loc="upper left",bbox_to_anchor=(0,1.18))

#used xticks to include all the validation data -> 900 eps in this case, and to index where the data for the new model starts -> 10 points for 9 intervals
plt.xticks(np.linspace(0,900,10))
plt.savefig("validation_100_ep_every_10.png")
print("Done")

