In [13]:
from antAgent import Ant, DataObject

In [14]:
import roboschool
import gym
import numpy as np
import tensorflow as tf
import csv
import time
import os
import matplotlib.pyplot as plt
%matplotlib inline

# Create environments

In [15]:
# Create training environments
# The creature will have to do multiple environments to obtain it's score
envs = []
number_of_envs = 3
for i in range(number_of_envs):
    env = gym.make('RoboschoolAnt-v1')
    envs.append(env)

# Training parameters

In [16]:
# Variables
max_number_of_steps = 500
number_of_agents = 300
number_of_previous_agents = 300

hidden_layer_widths = [64,32]

children_ratio = 0.2
mutation_ratio = 0.6

mutation_distance = 0.05
mutation_chance = 0.05

falling_reward = -500
additional_moving_forward_reward_multiplier = 10

# Save parameters

In [17]:
# Save settings
log_file_path = './log_file.csv'
savepoint_dir = './agents_weights/auto_savepoints'
saving_interval_time = 10 * 60

# Create the creatures

In [18]:
# Get information about environment
env = envs[0]
env.reset()
action = env.action_space.sample()
obv, reward, is_done, info = env.step(action)

input_shape = np.array(obv).shape
output_shape = np.array(action).shape

# Headers of save file
headers = ['episode', 'agent_number', 'id', 
           'parent_0', 'parent_1', 
           'fitness_value', 
           'mutated', 'cloned', 
           'mutation_distance', 'mutation_chance', 'max_number_of_steps'
]
    
# Create agents
print('Create new agents')
# Reset graphs if any
tf.compat.v1.reset_default_graph()
agents = []
for i in range(number_of_agents):
    print(i)
    agent = Ant(input_shape, output_shape, 
                hidden_layer_widths = hidden_layer_widths)
    agent.build()
    agents.append(agent)
reward_history = []
logs = []  
episode_number = 0


Create new agents
0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272


# Load weights of the creatures

(Optional)

In [8]:
# Auto save model
load_episode = 1
clean_start = False
loadpoint_dir = './agents_weights/auto_savepoints/{}'.format(load_episode)

# Pretrained models 128 32 layers model
# load_episode = '128_32'
# clean_start = True
# loadpoint_dir = './agents_weights/{}'.format(load_episode)

# Pretrained models 64 32 layers model
# load_episode = '64_32'
# clean_start = True
# loadpoint_dir = './agents_weights/{}'.format(load_episode)

In [10]:
# Load previous saves
with open(log_file_path, mode='r') as log_file:
    csv_reader = csv.DictReader(log_file)
    data = [row for row in csv_reader]
    number_of_rows = len(data)
    for agent_index, agent in enumerate(agents):

        print('{}/{}.h5'.format(loadpoint_dir, agent_index))

        agent_index = agent_index % number_of_previous_agents

        agent.model.load_weights('{}/{}.h5'.format(loadpoint_dir, agent_index))

        # Don't insert the previous values
        if clean_start == True:
            continue

        row = None
        for iRow in reversed(data):
            if (int(iRow['episode']) == load_episode) and (int(iRow['agent_number']) == agent_index):
                row = iRow
                break
        if row == None:
            raise ValueError('Data of agent not found')

        agent.data.id = row['id']
        agent.data.parents = [row['parent_0'],row['parent_1']]
        agent.data.fitness_value = row['fitness_value']
        agent.data.mutated=row['mutated']
        agent.data.cloned=row['cloned']

    if clean_start == False:
        episode_number = int(row['episode'])    
    if clean_start == True:    
        episode_number=0

./agents_weights/128_32/0.h5
./agents_weights/128_32/1.h5
./agents_weights/128_32/2.h5
./agents_weights/128_32/3.h5
./agents_weights/128_32/4.h5
./agents_weights/128_32/5.h5
./agents_weights/128_32/6.h5
./agents_weights/128_32/7.h5
./agents_weights/128_32/8.h5
./agents_weights/128_32/9.h5
./agents_weights/128_32/10.h5
./agents_weights/128_32/11.h5
./agents_weights/128_32/12.h5
./agents_weights/128_32/13.h5
./agents_weights/128_32/14.h5
./agents_weights/128_32/15.h5
./agents_weights/128_32/16.h5
./agents_weights/128_32/17.h5
./agents_weights/128_32/18.h5
./agents_weights/128_32/19.h5
./agents_weights/128_32/20.h5
./agents_weights/128_32/21.h5
./agents_weights/128_32/22.h5
./agents_weights/128_32/23.h5
./agents_weights/128_32/24.h5
./agents_weights/128_32/25.h5
./agents_weights/128_32/26.h5
./agents_weights/128_32/27.h5
./agents_weights/128_32/28.h5
./agents_weights/128_32/29.h5


# Train the creatures

In [19]:
# Create logfile
file_exists = os.path.isfile(log_file_path)
with open(log_file_path, mode='a') as log_file:
    # log writer
    log_writer = csv.writer(log_file, delimiter=',')
    # Create header if they do not exist
    if not file_exists:
        log_writer.writerow(headers)
        
# Adjust mutation rate and muation distance
for agent in agents:
    agent.mutation_distance = mutation_distance
    agent.mutation_chance = mutation_chance

# Initiate save timer
# 0 if you want to safe the first generation, time.time() if you want to save at the end of the period
last_save_time = 0 # int(time.time()) 

# Keep on running
while True:
    episode_number += 1
    print('Round {}'.format(episode_number))
    # Valuate the agents
    print('Start test...')
    total_rewards = []
    for agent_index, agent in enumerate(agents):
        total_reward = 0 
        obvs = []
        is_dones = []
        for env in envs:
            obv = env.reset()
            obvs.append(obv)
            is_dones.append(False)
        obvs = np.array(obvs)
        for step_number in range(max_number_of_steps):
            actions = agent.predict(obvs)

            for index, (env, action) in enumerate(zip(envs, actions)):
                if is_dones[index]:
                    continue
                obv, reward, is_done, info = env.step(action)
                reward += additional_moving_forward_reward_multiplier * env.rewards[1]
                obvs[index] = obv
                is_dones[index] = is_done
                if is_dones[index]:
                    print('Number {} is done at: {}'.format(index, step_number))
                    reward += falling_reward
                total_reward += reward

        print('Agent {} done with a total reward of {}'.format(agent_index, total_reward))
        total_rewards.append(total_reward)
        agent.data.fitness_value = total_reward

    # Save the rewards of this round
    reward_history.append(total_rewards)

    with open(log_file_path, mode='a') as log_file:
        # log writer
        log_writer = csv.writer(log_file, delimiter=',')
        for agent_number, agent in enumerate(agents):
            data = agent.data
            if len(data.parents) > 0:
                parent_id_0 = data.parents[0]
            else: 
                parent_id_0 = ''
            if len(data.parents) > 1:
                parent_id_1 = data.parents[1]
            else: 
                parent_id_1 = ''
            log_writer.writerow([
                episode_number,
                agent_number,
                data.id,
                parent_id_0,
                parent_id_1,
                data.fitness_value,
                data.mutated,
                data.cloned,
                agent.mutation_distance,
                agent.mutation_chance,
                max_number_of_steps,
                ])

    # Save data
    log = []
    for agent in agents:
        log.append(agent.data)
    logs.append(log)

    # Save models
    current_time = int(time.time())
    if current_time - last_save_time > saving_interval_time:
        last_save_time = current_time
        for agent_index, agent in enumerate(agents):
            total_save_dir = '{}/{}'.format(savepoint_dir, episode_number)
            try:
                # Create target Directory
                os.mkdir(total_save_dir)
            except FileExistsError:
                pass
                
            print('saving:{}'.format(agent_index))
            agent.model.save_weights('{}/{}.h5'.format(total_save_dir, agent_index))

    # Sexy time
    print('Initiate sex...')
    new_weights = []
    datas = []

    # Get the sexy time probabilities
    print('Get probabilities')
    total_rewards = np.array(total_rewards)
    normalized_rewards = (total_rewards - min(total_rewards)) / (max(total_rewards) - min(total_rewards))
    normalized_rewards /= sum(normalized_rewards)
    # Let's squire it
    normalized_rewards = normalized_rewards**2/np.sum(normalized_rewards**2)
#     normalized_rewards = normalized_rewards**4/np.sum(normalized_rewards**4)

    # Part of the new generation is new children

    # Partner set 1
    print('Create partner set 1')
    partner_set_1 = []
    for i in range(int(children_ratio * number_of_agents)):
        agent = np.random.choice(agents, p=normalized_rewards)
        partner_set_1.append(agent)

    # Partner set 2
    print('Create partner set 2')
    partner_set_2 = []
    for i in range(int(children_ratio * number_of_agents)):
        agent = np.random.choice(agents, p=normalized_rewards)
        partner_set_2.append(agent)

    # Determine the new weights
    print('Determine new weights')
    for partner_1, partner_2 in zip(partner_set_1, partner_set_2):
        child_weights = partner_1.get_child_weights_with_partner_and_mutate(partner_2)
        new_weights.append(child_weights)
        # Create new data structures
        data =  DataObject({
            'parents':[partner_1.data.id, partner_2.data.id],
            'fitness_value': 0,
            'mutated': True,
            'cloned': False
        })
        datas.append(data)

    # Part of the new generation is the a mutatant clone

    # Determine mutant clones
    print('Determine mutant clones')
    while len(new_weights) < mutation_ratio * number_of_agents:
        clone_parent_index = np.random.choice(range(0,number_of_agents), p=normalized_rewards)
        agent = agents[clone_parent_index]
        new_weights.append(agent.get_mutated_weights())
        # Create new data structures
        data =  DataObject({
            'parents':[agent.data.id],
            'fitness_value': 0,
            'mutated': True,
            'cloned': True
        })
        datas.append(data)

    # Part of the new generation is the previous once

    # Determine survivors
    print('Determine survivors')
    survivor_indices = []
    while len(new_weights) < number_of_agents:
        survivor_index = np.random.choice(range(0,number_of_agents), p=normalized_rewards)
        if survivor_index not in survivor_indices:
            survivor_indices.append(survivor_index)
            agent = agents[survivor_index]
            new_weights.append(agent.model.get_weights())
                # Create new data structures
            data =  DataObject({
                'parents':[agent.data.id],
                'fitness_value': 0,
                'mutated': False,
                'cloned': False
            })
            datas.append(data)

    # Execute new era

    # Create the next generation
    print('Create new generation')

    for agent, weights, data in zip(agents, new_weights, datas):
        agent.set_weights(weights)
        agent.data = data

# env.close()

Round 1
Start test...
Agent 0 done with a total reward of 3658.684831079843
Agent 1 done with a total reward of 3929.650385108821
Agent 2 done with a total reward of 4658.686957961907
Agent 3 done with a total reward of 5175.784237813082


KeyboardInterrupt: 

# Monitoring progress

In [12]:
# Np reward history object
history = np.array(reward_history)
# Sort the reward history
sorted_history = np.sort(history, axis=1)
# Get the number of spiders
amount = sorted_history.shape[1]
# Plot values
plt.plot(sorted_history[:,0])
plt.plot(sorted_history[:,int(0.1 * amount-1)])
plt.plot(sorted_history[:,int(0.25 * amount-1)])
plt.plot(sorted_history[:,int(0.5 * amount-1)])
plt.plot(sorted_history[:,int(0.75 * amount-1)])
plt.plot(sorted_history[:,int(0.9 * amount-1)])
plt.plot(sorted_history[:,int(amount-1)])

AxisError: axis 1 is out of bounds for array of dimension 1