Skip to content

Commit

Permalink
Merge pull request #10 from jerryz123/imitation-learning
Browse files Browse the repository at this point in the history
Imitation learning
  • Loading branch information
jerryz123 committed Nov 16, 2017
2 parents b4b8411 + a1f4e60 commit ddb3946
Show file tree
Hide file tree
Showing 16 changed files with 154 additions and 265 deletions.
45 changes: 30 additions & 15 deletions examples/collect_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,14 @@
import pygame
from copy import deepcopy
from random import random

from gym_urbandriving.agents import AccelAgent, KeyboardAgent, NullAgent
from gym_urbandriving.agents import AccelAgent, KeyboardAgent, NullAgent, TreeSearchAgent
from gym_urbandriving import Car

def vectorize_state(state):
res = []
for obj in state.dynamic_objects:
res.extend([obj.x, obj.y, obj.vel, obj.angle])
return res

def early_stop_actions(actions):
"""
Expand All @@ -30,7 +34,7 @@ def early_stop_actions(actions):
True if approximately all the cars have gone through the intersection and are back up to speed.
"""
return actions == [(0, 1), (0, 1), (0, 1), (0, 1)]
return actions[0] == None

def run_and_collect():
"""
Expand All @@ -47,24 +51,25 @@ def run_and_collect():
saved_actions = []

vis = uds.PyGameVisualizer((800, 800))
init_state = uds.state.SimpleIntersectionState(ncars=4, nped=0)
init_state = uds.state.SimpleIntersectionState(ncars=2, nped=0)

env = uds.UrbanDrivingEnv(init_state=init_state,
visualizer=vis,
agent_mappings={Car:NullAgent},
max_time=100,
agent_mappings={Car:AccelAgent},
max_time=200,

randomize=True,
nthreads=4)
use_ray=True)

env._render()
state = init_state
agent = AccelAgent()
state = env.current_state
agent = TreeSearchAgent()
reset_counter = 0
action = None

while(True):
action = agent.eval_policy(state)
saved_states.append(state.vectorize_state())
action = agent.eval_policy(deepcopy(state))
saved_states.append(vectorize_state(state))
start_time = time.time()
state, reward, done, info_dict = env._step(action)
saved_actions.append(info_dict["saved_actions"])
Expand All @@ -74,14 +79,24 @@ def run_and_collect():
reset_counter+=1
else:
reset_counter = 0
if done or reset_counter >50:

env._render(waypoints = agent.waypoints)
if done or reset_counter >5:
# Time to save our current run and reset our env and our saved data
reset_counter = 0
print("done")
time.sleep(1)
env._reset()
state = env.current_state


# reset agent state
agent.waypoints = None
agent.actions = None

pickle.dump((saved_states, saved_actions),open("data/"+str(np.random.random())+"dump.data", "wb+"))

saved_states = []
saved_actions = []

if __name__ == "__main__":
run_and_collect()
cProfile.run('run_and_collect()', 'temp/stats')

56 changes: 19 additions & 37 deletions examples/learn_model.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,9 @@
import glob
import pickle
import numpy as np
from sklearn import svm
from sklearn.model_selection import GridSearchCV, cross_val_score
from sklearn import linear_model
from sklearn.neural_network import MLPRegressor
from copy import deepcopy

# Parameter Grids for sci-kit learn
#param_grid = [{'C': [.01, 1, 100], 'gamma': [1, 10, 100], 'kernel': ['rbf', 'linear']}]
#param_grid = [{'C': [10], 'gamma': [1], 'kernel': ['rbf']}]
#param_grid = [{'alpha': [0, .01, .1, 1, 10, 100]}, {'learning_rate_init':[.01, .1, 1, 10, 100]}]
param_grid = [{'alpha': [0]}, {'learning_rate_init':[1]}]

from sklearn.ensemble import RandomForestClassifier
from sklearn import tree

def process_files(list_of_paths):
"""
Expand All @@ -39,21 +30,20 @@ def process_files(list_of_paths):
states = data[0]
actions = data[1]
assert(len(states) == len(actions))
if(len(states)<20): # bad demos that ended early
continue
for t in range(len(states)): # iterate through time
for o in range(len(actions[t])): # iterate through objects
if not actions[t][o] is None: # TODO: fix this, accel agents should't return None?
print(states[t])
if not actions[t][0] is None: # TODO: fix this, accel agents should't return None?
X.append(np.array(states[t])) # TODO: fix this after state vectorization works
y.append(actions[t][o][1])
y.append(actions[t][0])

X = np.matrix(X)
y = np.array(y)
# TODO: add feature normalization
print(X.shape)
print(y.shape)
print(X)
print(y)
return X,y
print(y.shape)

return X,y


def learn():
Expand All @@ -69,32 +59,24 @@ def learn():

all_data = glob.glob("data/*dump.data")
train_data = all_data[0:len(all_data)*8//10]
validation_data = all_data[len(all_data)*8//10: len(all_data)] # TODO: fix when there are <5 files
validation_data = all_data[len(all_data)*8//10: len(all_data)]
train_X, train_y = process_files(train_data)
valid_X, valid_y = process_files(validation_data)
#all_X, all_y = process_files(all_data)
svc = svm.SVC(kernel='rbf')
lr = linear_model.Lasso()
mlp = MLPRegressor(hidden_layer_sizes=(100, 100, 100), learning_rate='adaptive', max_iter = 10000, tol=1e-5)
model = GridSearchCV(estimator=mlp, param_grid=param_grid)
print(train_X.shape)

model = RandomForestClassifier(n_estimators=10, criterion='gini', max_features=None, max_depth=15)

model.fit(train_X, train_y)
train_yp = model.predict(train_X)
valid_yp = model.predict(valid_X)
"""
train_error = np.mean( train_y != train_yp)
valid_error = np.mean( valid_y != valid_yp)
sanity_error = np.mean( train_yp != 1)
sanity_error2 = np.mean( train_y != 1)
"""
train_error = np.mean(np.square(train_y - train_yp))
valid_error = np.mean(np.square(valid_y - valid_yp))
sanity_error = np.mean(train_yp < .5)
sanity_error2 = np.mean(train_y < .5)

train_error = np.mean(train_y != train_yp)
valid_error = np.mean(valid_y != valid_yp)


# TODO: more informative printout
print(len(train_data), len(validation_data))
print(sanity_error, sanity_error2, train_error, valid_error)
print(train_error, valid_error)

pickle.dump(model, open("model.model", "wb"))

if __name__ == "__main__":
Expand Down
3 changes: 2 additions & 1 deletion examples/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
import gym_urbandriving as uds
import cProfile
import time
import numpy as np

from gym_urbandriving.agents import KeyboardAgent, AccelAgent, NullAgent, TrafficLightAgent
from gym_urbandriving.assets import Car, TrafficLight

import numpy as np

"""
Test File, to demonstrate general functionality of environment
Expand Down Expand Up @@ -51,6 +51,7 @@ def f():
# Simulate the state
state, reward, done, info_dict = env._step(action)
env._render()
# keep simulator running in spite of collisions or timing out
done = False
# If we crash, sleep for a moment, then reset
if done:
Expand Down
16 changes: 8 additions & 8 deletions examples/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import numpy as np
import pickle

from gym_urbandriving.agents import ModelAgent
from gym_urbandriving.agents import ModelAgent, AccelAgent
from gym_urbandriving.assets import Car

def test_model():
Expand All @@ -24,14 +24,15 @@ def test_model():
start_time = time.time()

vis = uds.PyGameVisualizer((800, 800))
init_state = uds.state.SimpleIntersectionState()
init_state = uds.state.SimpleIntersectionState(ncars=2, nped=0)

env = uds.UrbanDrivingEnv(init_state=init_state,
visualizer=vis,
agent_mappings={Car:ModelAgent},
max_time=250,
agent_mappings={Car:AccelAgent},
max_time=200,
randomize=True,
nthreads=4)
use_ray=True)

env._render()
state = init_state
agent = ModelAgent()
Expand All @@ -57,10 +58,9 @@ def test_model():

print("done")
print((time.time()-start_time)/totalticks, totalticks)
print(info_dict["dynamic_collisions"])

accs += info_dict["predict_accuracy"]
print(accs/totalticks)
#accs += info_dict["predict_accuracy"]
#print(accs/totalticks)

env._reset()
state = env.current_state
Expand Down
24 changes: 14 additions & 10 deletions examples/tree_search_train.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import cProfile
import time

from gym_urbandriving.agents import NullAgent, TreeSearchAgent, SimplePathAgent, AccelAgent
from gym_urbandriving.agents import NullAgent, TreeSearchAgent, AccelAgent

import numpy as np
import pygame
Expand All @@ -19,24 +19,28 @@ def run():
Examples
--------
python3 examples/test_path.py
python3 examples/tree_search_train.py
"""

vis = uds.PyGameVisualizer((800, 800))
init_state = uds.state.SimpleIntersectionState(ncars=2, nped=0)
init_state = uds.state.SimpleIntersectionState(ncars=3, nped=0)


env = uds.UrbanDrivingEnv(init_state=None,
visualizer=vis,
agent_mappings={Car:NullAgent},
agent_mappings={Car:AccelAgent},
max_time=-1,
randomize=False,
)
randomize=True,
use_ray=True)


env._reset()
state = env.current_state
agent = TreeSearchAgent()

# To see the training in action
agent = TreeSearchAgent(vis = vis)

action = None

while(True):
Expand All @@ -48,12 +52,12 @@ def run():
if done:
print("done")
time.sleep(1)
print(info_dict["dynamic_collisions"])
env._reset()
state = env.current_state

# reset agent state
agent.waypoints = None
agent.actions = None

cProfile.run('run()', 'temp/stats')

if __name__ == "__main__":
run()
7 changes: 4 additions & 3 deletions gym_urbandriving/agents/accel_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@ class AccelAgent:
planning_env : UrbanDrivingEnv
World simulator used internally to plan
"""
actions = [(0, 1), (2, 1), (-2, 1), (0, 0), (1, -1), (-1, -1)]
#actions = [(0,1),(0,0),(0,-1)]

def __init__(self, agent_num=0):
self.agent_num = agent_num
from gym_urbandriving import UrbanDrivingEnv
self.planning_env = UrbanDrivingEnv(init_state=None)
self.valid_actions = [(0, 1), (2, 1), (-2, 1), (0, 0), (1, -1), (-1, -1)]
return

def eval_policy(self, state, nsteps=8):
Expand All @@ -42,12 +42,13 @@ def eval_policy(self, state, nsteps=8):
action
Best action
"""

self.planning_env._reset(state)
start_pos = state.dynamic_objects[self.agent_num].get_pos()
best_action = None
best_time = 0
best_distance = 0
for action in self.actions:
for action in self.valid_actions:
self.planning_env._reset()
pos = state.dynamic_objects[self.agent_num].get_pos()
dist_to_coll = state.min_dist_to_coll(self.agent_num)
Expand Down
1 change: 1 addition & 0 deletions gym_urbandriving/agents/keyboard_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ class KeyboardAgent:
def __init__(self, agent_num=0):
self.agent_num = agent_num
return

def eval_policy(self, state):
"""
Returns action based on keyboard input
Expand Down
28 changes: 9 additions & 19 deletions gym_urbandriving/agents/model_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,15 @@ def __init__(self, agent_num=0):
self.model = pickle.load(open("model.model", "rb"))
self.score = 0
return

def vectorize_state(self, state):
res = []
for obj in state.dynamic_objects:
res.extend([obj.x, obj.y, obj.vel, obj.angle])
return res



def eval_policy(self, state, nsteps=8):
def eval_policy(self, state):
"""
If we can accelerate, see if we crash in nsteps.
If we crash, decelerate, else accelerate
Expand Down Expand Up @@ -45,21 +51,5 @@ def eval_policy(self, state, nsteps=8):
best_action = action
best_time = time
"""

# Our prediction
pred_class = self.model.predict(np.array([state.vectorize_state()]))
our_action = (0,pred_class[0])

# TODO: fix arbitrary quantization
our_action = (0,1)
if pred_class<0:
our_action = (0,-1)
elif pred_class<.5:
our_action = (0,0)
else:
our_action = (0,1)


#self.score += (best_action[1]-pred_class[0])**2

return our_action
return self.model.predict(np.array([self.vectorize_state(state)]))[0]

0 comments on commit ddb3946

Please sign in to comment.