algorithm seems to converge to correct values

Swizec · Mar 19, 2012 · 59cf2b7 · 59cf2b7
1 parent d7ee0c6
commit 59cf2b7
Showing 1 changed file with 11 additions and 14 deletions.
diff --git a/stochastic_dp.py b/stochastic_dp.py
@@ -81,8 +81,7 @@
 def stochastic_value():
     value = [[1000 for row in range(len(grid[0]))] for col in range(len(grid))]
     policy = [[' ' for row in range(len(grid[0]))] for col in range(len(grid))]
-    lock = [[0 for row in range(len(grid[0]))] for col in range(len(grid))]
-
+    closed = []
     next = [(goal[1], goal[0])]
 
     value[goal[0]][goal[1]] = 0
@@ -121,29 +120,27 @@ def step(pos):
 
             vals.append((val, delta_name[i]))
 
-            if not wall([x+dx, y+dy]) and lock[y+dy][x+dx] < 1000:
+            if not wall([x+dx, y+dy]) and [x+dx, y+dy] not in closed:
                 next.append((x+dx, y+dy))
 
         val = min(vals, key=lambda v: v[0])
+        closed.append([x,y])
 
         if [y,x] == goal:
             return (0, '*')
         else:
             return val
 
-    while len(next) > 0:
-        pos = next.pop(0)
-        val, pol = step(pos)
-
-        if value[pos[1]][pos[0]] == val:
-            lock[pos[1]][pos[0]] += 1
-
-        value[pos[1]][pos[0]] = val
-        policy[pos[1]][pos[0]] = pol
+    for i in xrange(1000):
+        closed = []
+        next = [(goal[1], goal[0])]
 
-    print value
-    print policy
+        while next:
+            pos = next.pop(0)
+            val, pol = step(pos)
 
+            value[pos[1]][pos[0]] = val
+            policy[pos[1]][pos[0]] = pol
 
     return value, policy