<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array">
    <added>
      <filename>reinforce/sarsaview.cpp</filename>
    </added>
    <added>
      <filename>reinforce/sarsaview.h</filename>
    </added>
  </added>
  <modified type="array">
    <modified>
      <diff>@@ -86,6 +86,85 @@ void Agent::initQ()
     }
 }
 
+void Agent::initE()
+{
+        m_e.clear();
+    int i,j;
+
+    m_e.insert(MyQ(Status(0,0), Action(1,0)), 0);
+    m_e.insert(MyQ(Status(0,0), Action(1,1)), 0);
+    m_e.insert(MyQ(Status(0,0), Action(0,1)), 0);
+
+    m_e.insert(MyQ(Status(19,0), Action(0,1)), 0);
+    m_e.insert(MyQ(Status(19,0), Action(-1,1)), 0);
+    m_e.insert(MyQ(Status(19,0), Action(-1,0)), 0);
+
+    m_e.insert(MyQ(Status(0,9), Action(1,0)), 0);
+    m_e.insert(MyQ(Status(0,9), Action(1,-1)), 0);
+    m_e.insert(MyQ(Status(0,9), Action(0,-1)), 0);
+
+    m_e.insert(MyQ(Status(19,9), Action(-1,0)), 0);
+    m_e.insert(MyQ(Status(19,9), Action(-1,-1)), 0);
+    m_e.insert(MyQ(Status(19,9), Action(0,-1)), 0);
+
+    for(i=1;i&lt;19;i++){
+        m_e.insert(MyQ(Status(i,0), Action(1,0)), 0);
+        m_e.insert(MyQ(Status(i,0), Action(-1,0)), 0);
+        m_e.insert(MyQ(Status(i,0), Action(1,1)), 0);
+        m_e.insert(MyQ(Status(i,0), Action(-1,1)), 0);
+        m_e.insert(MyQ(Status(i,0), Action(0,1)), 0);
+
+        m_e.insert(MyQ(Status(i,9), Action(1,0)), 0);
+        m_e.insert(MyQ(Status(i,9), Action(-1,0)), 0);
+        m_e.insert(MyQ(Status(i,9), Action(1,-1)), 0);
+        m_e.insert(MyQ(Status(i,9), Action(-1,-1)), 0);
+        m_e.insert(MyQ(Status(i,9), Action(0,-1)), 0);
+    }
+    for(j=1;j&lt;9;j++){
+        m_e.insert(MyQ(Status(0, i), Action(0,1)), 0);
+        m_e.insert(MyQ(Status(0, i), Action(0,-1)), 0);
+        m_e.insert(MyQ(Status(0, i), Action(1,1)), 0);
+        m_e.insert(MyQ(Status(0, i), Action(1,-1)), 0);
+        m_e.insert(MyQ(Status(0, i), Action(1,0)), 0);
+
+        m_e.insert(MyQ(Status(19,i), Action(0,1)), 0);
+        m_e.insert(MyQ(Status(19,i), Action(0,-1)), 0);
+        m_e.insert(MyQ(Status(19,i), Action(-1,1)), 0);
+        m_e.insert(MyQ(Status(19,i), Action(-1,-1)), 0);
+        m_e.insert(MyQ(Status(19,i), Action(-1,0)), 0);
+    }
+    for(i=0;i&lt;20;i++){
+        for(j=0;j&lt;10;j++){
+            m_e.insert(MyQ(Status(i,j), Action(0,0)), 0);
+        }
+    }
+    for(i=1;i&lt;19;i++){
+        for(j=1;j&lt;9;j++){
+            m_e.insert(MyQ(Status(i,j), Action(1,0)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(-1,0)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(0,1)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(0,-1)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(1,1)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(1,-1)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(-1,1)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(-1,-1)), 0);
+        }
+    }
+    for(i=2;i&lt;18;i++){
+        for(j=2;j&lt;8;j++){
+            m_e.insert(MyQ(Status(i,j), Action(2,0)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(-2,0)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(0,2)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(0,-2)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(2,2)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(-2,2)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(2,-2)), 0);
+            m_e.insert(MyQ(Status(i,j), Action(-2,-2)), 0);
+        }
+    }
+}
+
+
 void Agent::changeAlpha(int i)
 {
     setAlpha(i/10);
@@ -262,7 +341,30 @@ int Agent::doEpisode()
         Reward old_Q_sa = m_Q.find(MyQ(s, a)).value();
         Reward old_Q_sa_dash = m_Q.find(MyQ(s_dash, a_dash)).value();
 
-        m_Q[MyQ(s, a)] = old_Q_sa + m_alpha*(r + m_gamma*old_Q_sa_dash - old_Q_sa);
+        Reward delta = r + m_gamma*old_Q_sa_dash - old_Q_sa;
+        m_e[MyQ(s, a)] = m_e[MyQ(s, a)] + 1;
+
+
+        //Normal Sarsa
+//        Reward newval = old_Q_sa + m_alpha*(delta);
+//        m_Q[MyQ(s, a)] = newval;
+//        Reward debugval = m_Q[MyQ(s, a)];
+
+        //&#402;&#201;
+        //&#8218;&#183;&#8218;&#215;&#8218;&#196;&#8218;&#204;s,a&#8218;&#201;&#8216;&#206;&#8218;&#181;:
+        //Q(s,a) *&#169; Q(s,a)*{&#402;&#191;&#402;&#194;e(s,a)
+        //e(s,a)*&#169; &#402;&#193;&#402;&#201;e(s,a)
+        // &#710;&#234;&#8212;&#8212;
+        QMap&lt; MyQ, Reward &gt;::iterator p;
+        for(p=m_Q.begin(); p!=m_Q.end(); p++)
+        {
+            MyQ q(p.key());
+            m_Q[p.key()] = m_Q[p.key()] + m_alpha * delta * m_e[p.key()];
+            m_e[p.key()] = m_gamma * 0.9 * m_e[p.key()];
+        }
+
+
+        emit updateQ();
         s = s_dash;
         a = a_dash;
         ret++;</diff>
      <filename>reinforce/agent.cpp</filename>
    </modified>
    <modified>
      <diff>@@ -17,6 +17,7 @@ public:
     void setEps(double eps){ m_eps = eps; }
     int doEpisode();
     void initQ();
+    void initE();
     bool isFin(Status st);
     void stop();
     Action selectAction(Status s);
@@ -24,13 +25,17 @@ public slots:
     void changeAlpha(int i);
     void changeGamma(int i);
     void changeEps(int i);
+signals:
+    void updateQ();
 private:
     Environment *m_env;
     double m_alpha;
     double m_gamma;
     double m_eps;
     bool m_stop;
+public:
     QMap&lt; MyQ, Reward &gt; m_Q;
+    QMap&lt; MyQ, double &gt; m_e;
 };
 
 #endif // AGENT_H</diff>
      <filename>reinforce/agent.h</filename>
    </modified>
    <modified>
      <diff>@@ -9,9 +9,12 @@ TEMPLATE = app
 SOURCES += main.cpp \
     mainwindow.cpp \
     environment.cpp \
-    agent.cpp
+    agent.cpp \
+    sarsaview.cpp \
+    ../Acrobot/utils.cpp
 HEADERS += mainwindow.h \
     environment.h \
     agent.h \
-    defs.h
+    defs.h \
+    sarsaview.h
 FORMS += mainwindow.ui</diff>
      <filename>reinforce/demo1.pro</filename>
    </modified>
    <modified>
      <diff>@@ -26,6 +26,10 @@ MainWindow::MainWindow(QWidget *parent)
             agent, SLOT(changeGamma(int)));
     connect(ui-&gt;epsilonCombo, SIGNAL(currentIndexChanged(int)),
             agent, SLOT(changeEps(int)));
+
+    ui-&gt;sView-&gt;setQ(&amp;(agent-&gt;m_Q));
+    connect(agent, SIGNAL(updateQ()),
+            ui-&gt;sView, SLOT(update()));
 }
 
 MainWindow::~MainWindow()
@@ -68,6 +72,7 @@ void MainWindow::on_startButton_clicked()
     }
     env-&gt;initStatus();
     agent-&gt;initQ();
+    agent-&gt;initE();
     for(int i=0;i&lt;1000;i++){
         if(stop) break;
         ui-&gt;textEdit-&gt;append(&quot;try&quot;);</diff>
      <filename>reinforce/mainwindow.cpp</filename>
    </modified>
    <modified>
      <diff>@@ -1414,6 +1414,9 @@
          &lt;/item&gt;
         &lt;/widget&gt;
        &lt;/item&gt;
+       &lt;item&gt;
+        &lt;widget class=&quot;SarsaView&quot; name=&quot;sView&quot; native=&quot;true&quot;/&gt;
+       &lt;/item&gt;
       &lt;/layout&gt;
      &lt;/widget&gt;
     &lt;/item&gt;
@@ -1462,6 +1465,14 @@
   &lt;/action&gt;
  &lt;/widget&gt;
  &lt;layoutdefault spacing=&quot;6&quot; margin=&quot;11&quot;/&gt;
+ &lt;customwidgets&gt;
+  &lt;customwidget&gt;
+   &lt;class&gt;SarsaView&lt;/class&gt;
+   &lt;extends&gt;QWidget&lt;/extends&gt;
+   &lt;header&gt;sarsaview.h&lt;/header&gt;
+   &lt;container&gt;1&lt;/container&gt;
+  &lt;/customwidget&gt;
+ &lt;/customwidgets&gt;
  &lt;resources/&gt;
  &lt;connections&gt;
   &lt;connection&gt;</diff>
      <filename>reinforce/mainwindow.ui</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>b52adbaf108272cd5b1467351ac78a97542c8a49</id>
    </parent>
  </parents>
  <author>
    <name>NOSE Takafumi</name>
    <email>ahya365@gmail.com</email>
  </author>
  <url>http://github.com/plus7/plus7pub/commit/907e79d2ab92715084e154e80523b25730416d95</url>
  <id>907e79d2ab92715084e154e80523b25730416d95</id>
  <committed-date>2009-05-14T22:22:40-07:00</committed-date>
  <authored-date>2009-05-14T22:22:40-07:00</authored-date>
  <message>Sarsa(lambda)</message>
  <tree>0df7dc2a48d7b255ba6c5991987535f575213af6</tree>
  <committer>
    <name>NOSE Takafumi</name>
    <email>ahya365@gmail.com</email>
  </committer>
</commit>
