<?xml version="1.0" encoding="UTF-8"?>
<commit>
  <added type="array"/>
  <modified type="array">
    <modified>
      <diff>@@ -3,7 +3,7 @@
  */
 package glueAgent;
 
-import learners.EpisodicSarsaLearner;
+import learners.SarsaLearner;
 
 import org.rlcommunity.rlglue.codec.AgentInterface;
 import org.rlcommunity.rlglue.codec.taskspec.TaskSpec;
@@ -26,7 +26,6 @@ public class GlueAgent implements AgentInterface {
 //	private Action action;
 	TaskSpec TS = null;
 
-
 	int getStateSpaceSize() {
 		int size = 1;
 
@@ -72,25 +71,23 @@ public class GlueAgent implements AgentInterface {
 		// FIXME: rrl.TaskDescription.environment = new GlueEnvironment(TSO);
 
 		System.out.println(&quot;Size of qtable &quot; + getStateSpaceSize() + &quot;x&quot; + getActionSpaceSize());
-		learner = new EpisodicSarsaLearner(new DeterministicQTable(getStateSpaceSize(), getActionSpaceSize()));
+		//learner = new EpisodicSarsaLearner(new DeterministicQTable(getStateSpaceSize(), getActionSpaceSize()));
 		//learner = new RandomLearner(new DeterministicQTable(getStateSpaceSize(), getActionSpaceSize()));
 
-		// learner = new SarsaLearner(new DeterministicQTable(getStateSpaceSize(), getActionSpaceSize()));
+		learner = new SarsaLearner(new DeterministicQTable(getStateSpaceSize(), getActionSpaceSize()));
 		//learner = new SarsaLearner(new TabularModel());
 		// learner = new SarsaLearner(new TabularModel());
 
-		learner.agent_init();
+		learner.agentInit();
 
-		//        action = new Action(TSO.num_discrete_action_dims, 0);
+		//        action = new Action(TSO.num_discrete_action_dims, 0); done
 	}
 
 	public Action agent_start(Observation o) {
-		System.out.println(&quot;START&quot;);
-		
 		//Choose and return the agent's first action
 		State state = new GlueState(o);
 
-		rrl.Action action = learner.agent_start(state);
+		rrl.Action action = learner.agentStart(state);
 		GlueAction ca = (GlueAction) action;
 
 		return ca.a;
@@ -103,28 +100,59 @@ public class GlueAgent implements AgentInterface {
 //		if (r &gt; 0)
 //		System.out.println(&quot;Reward &quot; + r);
 
-		rrl.Action action = learner.agent_step(r, state);
+		rrl.Action action = learner.agentStep(r, state);
 		GlueAction ca = (GlueAction) action;
 
 		return ca.a;
 	}
 
 	public void agent_end(double r) {
-		System.out.println(&quot;END&quot;);
-		
-		learner.agent_end(r);
+		learner.agentEnd(r);
 	}
+	
+	  public String agent_message(String message) {
+
+	        if (message.equals(&quot;freeze learning&quot;)) {
+	        	learner.agentFreeze();
+	            return &quot;message understood, policy frozen&quot;;
+	        }
+	        if (message.equals(&quot;unfreeze learning&quot;)) {
+	        	learner.agentUnfreeze();
+	            return &quot;message understood, policy unfrozen&quot;;
+	        }
+	        if (message.equals(&quot;freeze exploring&quot;)) {
+	            learner.agentFreezeExploring();
+	            return &quot;message understood, exploring frozen&quot;;
+	        }
+	        if (message.equals(&quot;unfreeze exploring&quot;)) {
+	            learner.agentUnfreezeExploring();
+	            return &quot;message understood, exploring unfrozen&quot;;
+	        }
+//	        if (message.startsWith(&quot;save_policy&quot;)) {
+//	            String[] parts = message.split(&quot; &quot;);
+//	            saveValueFunction(parts[1]);
+//	            System.out.println(&quot;Saved.&quot;);
+//	            return &quot;message understood, saving policy&quot;;
+//	        }
+//	        if (message.startsWith(&quot;load_policy&quot;)) {
+//	            String[] parts = message.split(&quot; &quot;);
+//	            loadValueFunction(parts[1]);
+//	            System.out.println(&quot;Loaded.&quot;);
+//	            return &quot;message understood, loading policy&quot;;
+//	        }
+
+	        return &quot;SampleSarsaAgent(Java) does not understand your message.&quot;;
+
+	    }
+
 
 	public void agent_cleanup() {
 		System.out.println(&quot;cleanup&quot;);
 	}
 
-	public void agent_freeze() {
+	public void agentFreeze() {
 		System.out.println(&quot;freeze&quot;);
-		learner.agent_freeze();
-	}
-
-	public String agent_message(String theMessage) {
-		return null;
+		learner.agentFreeze();
 	}
+	
 }</diff>
      <filename>src/glueAgent/GlueAgent.java</filename>
    </modified>
    <modified>
      <diff>@@ -30,42 +30,13 @@ import java.io.IOException;
 import java.util.Vector;
 import org.rlcommunity.rlglue.codec.RLGlue;
 
-/**
- * Experiment program that does some of the things that might be important when
- * running an experiment.  It runs an agent on the environment and periodically
- * asks the agent to &quot;freeze learning&quot;: to stop updating its policy for a number
- * of episodes in order to get an estimate of the quality of what has been learned
- * so far.
- *
- * The experiment estimates statistics such as the mean and standard deviation of
- * the return gathered by the policy and writes those to a comma-separated value file
- * called results.csv.
- *
- * This experiment also shows off some other features that can be achieved easily
- * through the RL-Glue env/agent messaging system by freezing learning (described
- * above), having the environment start in specific starting states, and saving
- * and loading the agent's value function to/from a binary data file.
- * @author Brian Tanner
- */
 public class GlueExperiment {
 
-    private void saveResultsToCSVFile(Vector&lt;evaluationPoint&gt; results, String fileName) {
-        try {
-            FileWriter FW = new FileWriter(new File(fileName));
-            FW.write(&quot;#Results from SampleExperiment.java.  First line is means, second line is standard deviations.\n&quot;);
-            for (evaluationPoint point : results) {
-                FW.write(&quot;&quot; + point.mean + &quot;,&quot;);
-            }
-            FW.write(&quot;\n&quot;);
-            for (evaluationPoint point : results) {
-                FW.write(&quot;&quot; + point.standardDeviation + &quot;,&quot;);
-            }
-            FW.write(&quot;\n&quot;);
-            FW.close();
-        } catch (IOException ex) {
-            System.out.println(&quot;Problem writing results out to file: &quot; + fileName + &quot; :: &quot; + ex);
-        }
-    }
+	static final int numRuns = 20;
+	static final int numTrainingEpisodesWithSaving = 25;
+	static final int numTrainingEpisodesWithoutSaving = 1000;
+	static final int numTestingEpisodes = 25;
+	static String outputDirectory = &quot;/home/valibook/eclipse/learny/&quot;;
 
     class evaluationPoint {
 
@@ -78,116 +49,93 @@ public class GlueExperiment {
         }
     }
 
-    /**
-     * Tell the agent to stop learning, then execute n episodes with his current
-     * policy.  Estimate the mean and variance of the return over these episodes.
-     * @return
-     */
-    evaluationPoint evaluateAgent() {
-        int i = 0;
-        double sum = 0;
-        double sum_of_squares = 0;
-        double this_return = 0;
-        double mean;
-        double variance;
-        int n = 10;
-
-        RLGlue.RL_agent_message(&quot;freeze learning&quot;);
-        for (i = 0; i &lt; n; i++) {
-            /* We use a cutoff here in case the policy is bad
-            and will never end an episode */
-            System.out.println(&quot;episode i = &quot; + i);
-        	RLGlue.RL_episode(5000);        	
-            this_return = RLGlue.RL_return();
-            System.out.println(&quot;episode done, return &quot; + this_return);
-            sum += this_return;
-            sum_of_squares += this_return * this_return;
-        }
-        RLGlue.RL_agent_message(&quot;unfreeze learning&quot;);
-
-        mean = sum / (double)n;
-        variance = (sum_of_squares - (double)n * mean * mean) / ((double)n - 1.0f);
-        return new evaluationPoint(mean, Math.sqrt(variance));
-    }
-
-    /*
-        This function will freeze the agent's policy and test it after every 25 episodes.
-     */
-    void printScore(int afterEpisodes, evaluationPoint theScore) {
-        System.out.printf(&quot;%d\t\t%.2f\t\t%.2f\n&quot;, afterEpisodes, theScore.mean, theScore.standardDeviation);
-    }
-
-    void offlineDemo() {
-        Vector&lt;evaluationPoint&gt; results = new Vector&lt;evaluationPoint&gt;();
-        evaluationPoint initialScore = evaluateAgent();
-        printScore(0, initialScore);
-        for (int i = 0; i &lt; 20; i++) {
-            for (int j = 0; j &lt; 25; j++) {
-                RLGlue.RL_episode(0);
-            }
-            evaluationPoint currentScore = evaluateAgent();
-            printScore((i + 1) * 25, currentScore);
-            results.add(currentScore);
-        }
-
-        System.out.println(&quot;The results of this experiment have been saved to a&quot; +
-                &quot; comma-separated value file called results.csv that you may open with Matlab, Octave, Excel, etc.&quot;);
-
-        saveResultsToCSVFile(results, &quot;results.csv&quot;);
-
-    }
-
     public void runExperiment() {
-        RLGlue.RL_init();
-        System.out.println(&quot;Starting offline demo\n----------------------------\nWill alternate learning for 25 episodes, then freeze policy and evaluate for 10 episodes.\n&quot;);
-        System.out.println(&quot;After Episode\tMean Return\tStandard Deviation\n-------------------------------------------------------------------------&quot;);
-        offlineDemo();
-
-//        System.out.println(&quot;\nNow we will save the agent's learned value function to a file....&quot;);
-//
-//        RLGlue.RL_agent_message(&quot;save_policy valuefunction.dat&quot;);
-//
-//        System.out.println(&quot;\nCalling RL_cleanup and RL_init to clear the agent's memory...&quot;);
-//
-//        RLGlue.RL_cleanup();
-//        RLGlue.RL_init();
-//
-//
-//        System.out.println(&quot;Evaluating the agent's default policy:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------&quot;);
-//        evaluationPoint thisScore=evaluateAgent();
-//        printScore(0, thisScore);
-//
-//        System.out.println(&quot;\nLoading up the value function we saved earlier.&quot;);
-//        RLGlue.RL_agent_message(&quot;load_policy valuefunction.dat&quot;);
-//
-//        System.out.println(&quot;Evaluating the agent after loading the value function:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------&quot;);
-//        thisScore=evaluateAgent();
-//        printScore(0, thisScore);
-//
-//        System.out.println(&quot;Telling the environment to use fixed start state of 2,3.&quot;);
-//        RLGlue.RL_env_message(&quot;set-start-state 2 3&quot;);
-//
-//        RLGlue.RL_start();
-//
-//        System.out.println(&quot;Telling the environment to print the current state to the screen.&quot;);
-//        RLGlue.RL_env_message(&quot;print-state&quot;);
-//
-//        System.out.println(&quot;Evaluating the agent a few times from a fixed start state of 2,3:\n\t\tMean Return\tStandardDeviation\n-------------------------------------------&quot;);
-//        thisScore=evaluateAgent();
-//        printScore(0, thisScore);
-//
-//        System.out.println(&quot;Evaluating the agent again with the random start state:\n\t\tMean Return\tStandardDeviation\n------------------------------------------------------&quot;);
-//        RLGlue.RL_env_message(&quot;set-random-start-state&quot;);
-//        thisScore=evaluateAgent();
-//        printScore(0, thisScore);
-//
-//        System.out.println(&quot;\nProgram Complete.&quot;);
-//        RLGlue.RL_cleanup();
-//
-
+    	long[] trainingSteps = new long[numTrainingEpisodesWithSaving];         
+    	double[] avgTrainingSteps = new double[numTrainingEpisodesWithSaving];        
+    	double[] trainingRewards = new double[numTrainingEpisodesWithSaving];
+    	long[] testingSteps = new long[numTestingEpisodes];
+    	double[] avgTestingSteps = new double[numTestingEpisodes];
+    	double[] testingRewards = new double[numTestingEpisodes];
+
+    	for (int i = 0; i &lt; numRuns; i++) {
+    		RLGlue.RL_init();
+
+    		for (int j = 0; j &lt; numTrainingEpisodesWithSaving; j++) {
+    			trainingSteps[j] = 0;
+    			trainingRewards[j] = 0;
+    		}
+    		for (int j = 0; j &lt; numTestingEpisodes; j++) {
+    			testingSteps[j] = 0;
+    			testingRewards[j] = 0;
+    		}
+
+    		// train &amp;&amp; save statistics
+    		for (int j = 0; j &lt; numTrainingEpisodesWithSaving; j++) {
+    			RLGlue.RL_episode(5000);
+    			trainingSteps[j] += RLGlue.RL_num_steps();
+    			trainingRewards[j] += RLGlue.RL_return();
+    		}
+
+    		// just train
+    		for (int j = 0; j &lt; numTrainingEpisodesWithoutSaving; j++)
+    			RLGlue.RL_episode(5000);
+    			
+
+    		RLGlue.RL_agent_message(&quot;freeze_learning&quot;);
+    		// test = evaluate agent
+    		for (int j = 0; j &lt; numTestingEpisodes; j++) {
+    			RLGlue.RL_episode(5000);
+    			testingSteps[j] += RLGlue.RL_num_steps();
+    			testingRewards[j] += RLGlue.RL_return();
+    		}
+
+    		RLGlue.RL_cleanup();
+    	}
+
+    	for (int j = 0; j &lt; numTrainingEpisodesWithSaving; j++)
+    		avgTrainingSteps[j] = (double)trainingSteps[j] / (double)numRuns;
+    	for (int j = 0; j &lt; numTestingEpisodes; j++)
+    		avgTestingSteps[j] = (double)testingSteps[j] / (double)numRuns;
+
+    	try {
+    		FileWriter FW = new FileWriter(new File(outputDirectory + &quot;/avgTrainingSteps.txt&quot;));
+    		for (int j = 0; j &lt; numTrainingEpisodesWithSaving; j++) {
+    			FW.write(&quot;&quot; + avgTrainingSteps[j] + &quot;\n&quot;);
+    		}
+    		FW.close();
+    		System.out.println(&quot;Created file &quot; + outputDirectory + &quot;/avgTrainingSteps.txt&quot;);
+
+    		FW = new FileWriter(new File(outputDirectory + &quot;/avgTrainingRewards.txt&quot;));
+    		for (int j = 0; j &lt; numTrainingEpisodesWithSaving; j++) {
+    			FW.write(&quot;&quot; + (double)trainingRewards[j] / (double)numRuns + &quot;\n&quot;);
+    		}
+    		FW.close();
+    		System.out.println(&quot;Created file &quot; + outputDirectory + &quot;/avgTrainingRewards.txt&quot;);
+
+    		
+    		FW = new FileWriter(new File(outputDirectory + &quot;/avgTestingRewards.txt&quot;));
+    		for (int j = 0; j &lt; numTestingEpisodes; j++) {
+    			FW.write(&quot;&quot; + (double)testingSteps[j] / (double)numRuns + &quot;\n&quot;);
+    		}
+    		FW.close();
+    		System.out.println(&quot;Created file &quot; + outputDirectory + &quot;/avgTestingRewards.txt&quot;);
+    		
+    		FW = new FileWriter(new File(outputDirectory + &quot;/avgTestingSteps.txt&quot;));
+    		for (int j = 0; j &lt; numTestingEpisodes; j++) {
+    			FW.write(&quot;&quot; + avgTestingSteps[j] + &quot;\n&quot;);
+    		}
+    		FW.close();
+    		System.out.println(&quot;Created file &quot; + outputDirectory + &quot;/avgTestingSteps.txt&quot;);
+
+    	} catch (IOException ex) {
+    		System.out.println(&quot;Problem writing results out to file: &quot; + ex);
+    	}
     }
 
     public static void main(String[] args) {
+    	if (args.length &gt; 1 &amp;&amp; args[1] != null)
+    		outputDirectory = args[1];
+    	
         GlueExperiment theExperiment = new GlueExperiment();
         theExperiment.runExperiment();
     }</diff>
      <filename>src/glueAgent/GlueExperiment.java</filename>
    </modified>
    <modified>
      <diff>@@ -36,7 +36,7 @@ public class EpisodicQLearner implements Learner, Serializable  {
 		this.model = model;
 	}
 
-	public void agent_init() {
+	public void agentInit() {
 		// model = new Tree();
 		egreedy = new EGreedyPolicy();
 		episode = 0;
@@ -44,12 +44,12 @@ public class EpisodicQLearner implements Learner, Serializable  {
 		EpisodeMemoryRewards = new ArrayList&lt;Double&gt;();
 	}
 
-	public void agent_cleanup() {
+	public void agentCleanup() {
 		EpisodeMemoryQValues = null;
 		EpisodeMemoryRewards = null;
 	}
 	
-	public void agent_end(double reward) {
+	public void agentEnd(double reward) {
 		EpisodeMemoryRewards.add(reward);
 		
 		if (EpisodeMemoryRewards.size() != EpisodeMemoryQValues.size()) {
@@ -80,16 +80,16 @@ public class EpisodicQLearner implements Learner, Serializable  {
 		EpisodeMemoryQValues.clear();
 	}
 
-	public void agent_freeze() {
+	public void agentFreeze() {
 		alpha = 0f;
 		epsilon = 0f;
 	}
 
-	public String agent_message(String arg0) {
+	public String agentMessage(String arg0) {
 		return &quot;SarsaAgent does not implement agent_message currently&quot;;
 	}
 
-	public rrl.Action agent_start(State state) {		
+	public rrl.Action agentStart(State state) {		
 		rrl.Action action = egreedy.chooseAction(model, epsilon, state);  
 		
 		QValue qValue = model.findQValue(state, action);
@@ -103,7 +103,7 @@ public class EpisodicQLearner implements Learner, Serializable  {
 		return action;
 	}
 	
-	public rrl.Action agent_step(double reward, State state) {
+	public rrl.Action agentStep(double reward, State state) {
 		
 		rrl.Action newAction = egreedy.chooseAction(model, epsilon, state);
 		
@@ -179,4 +179,31 @@ public class EpisodicQLearner implements Learner, Serializable  {
 			ex.printStackTrace();
 		}
 	}
+
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentFreezeExploring()
+	 */
+	@Override
+	public void agentFreezeExploring() {
+		// TODO Auto-generated method stub
+		
+	}
+
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentUnfreeze()
+	 */
+	@Override
+	public void agentUnfreeze() {
+		// TODO Auto-generated method stub
+		
+	}
+
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentUnfreezeExploring()
+	 */
+	@Override
+	public void agentUnfreezeExploring() {
+		// TODO Auto-generated method stub
+		
+	}
 }</diff>
      <filename>src/learners/EpisodicQLearner.java</filename>
    </modified>
    <modified>
      <diff>@@ -8,6 +8,8 @@ import java.io.ObjectOutputStream;
 import java.io.Serializable;
 import java.util.ArrayList;
 
+import org.rlcommunity.rlglue.codec.RLGlue;
+
 import rrl.DeterministicModel;
 import rrl.EGreedyPolicy;
 import rrl.Learner;
@@ -16,7 +18,7 @@ import rrl.State;
 
 /**
  * @author slusny
- *
+  *
  * SARSA learner with episodic memory.
  */
 public class EpisodicSarsaLearner implements Learner, Serializable  {
@@ -36,7 +38,7 @@ public class EpisodicSarsaLearner implements Learner, Serializable  {
 		this.model = model;
 	}
 
-	public void agent_init() {
+	public void agentInit() {
 		// model = new Tree();
 		egreedy = new EGreedyPolicy();
 		episode = 0;
@@ -44,23 +46,20 @@ public class EpisodicSarsaLearner implements Learner, Serializable  {
 		EpisodeMemoryRewards = new ArrayList&lt;Double&gt;();
 	}
 
-	public void agent_freeze() {
+	public void agentFreeze() {
 		alpha = 0f;
 		epsilon = 0f;
 	}
 
-	public String agent_message(String arg0) {
+	public String agentMessage(String arg0) {
 		return &quot;SarsaAgent does not implement agent_message currently&quot;;
 	}
 
-	public rrl.Action agent_start(State state) {		
-		if (EpisodeMemoryRewards.size() &gt; 0 || EpisodeMemoryQValues.size() &gt;  0) {
-			System.out.println(&quot;Internal error 3&quot;);
-			System.out.println(EpisodeMemoryRewards.size());
-			System.out.println(EpisodeMemoryQValues.size());
-			throw new ArithmeticException();
-		}
-
+	public rrl.Action agentStart(State state) {		
+		step_num = 0;
+		EpisodeMemoryRewards.clear();
+		EpisodeMemoryQValues.clear();
+			
 		rrl.Action action = egreedy.chooseAction(model, epsilon, state);  
 		
 		QValue qValue = model.findQValue(state, action);
@@ -70,15 +69,11 @@ public class EpisodicSarsaLearner implements Learner, Serializable  {
 		}
 
 		EpisodeMemoryQValues.add(qValue);
-
-		step_num = 0;
-		
-//		System.out.println(&quot;AGENT START &quot; + EpisodeMemoryRewards.size() + &quot; &quot; + EpisodeMemoryQValues.size());
 		
 		return action;
 	}
 	
-	public rrl.Action agent_step(double reward, State state) {
+	public rrl.Action agentStep(double reward, State state) {
 		
 		rrl.Action newAction = egreedy.chooseAction(model, epsilon, state);
 		
@@ -93,33 +88,18 @@ public class EpisodicSarsaLearner implements Learner, Serializable  {
 		
 		step_num += 1;
 		
-		return newAction;
-	}
-	
-	public void writeInfo(String fileName) {
-		model.writeInfo(fileName);
-	}
-	
-	public void loadMe(ObjectInputStream in) {
-		try	{
-			egreedy = (EGreedyPolicy) in.readObject();
-			model = (DeterministicModel) in.readObject();
-			alpha = in.readDouble();
-			epsilon = in.readDouble(); 
-			sarsa_gamma = in.readDouble();
-			step_num = in.readInt();
-		} catch(IOException ex)	{
-			ex.printStackTrace();
-		} catch(ClassNotFoundException ex){
-			ex.printStackTrace();
+		if (step_num % 100 == 0) {
+			System.out.println(&quot;STEP &quot; + step_num);
+//			System.out.println(&quot;Telling the environment to print the current state to the screen.&quot;);
+//	        RLGlue.RL_env_message(&quot;print-state&quot;);
 		}
+		
+		return newAction;
 	}
 
-	public void agent_end(double reward) {		
+	public void agentEnd(double reward) {		
 		EpisodeMemoryRewards.add(reward);
 		
-		System.out.println(&quot;AGENT END &quot; + EpisodeMemoryRewards.size() + &quot; &quot; + EpisodeMemoryQValues.size());
-		
 		if (EpisodeMemoryRewards.size() != EpisodeMemoryQValues.size()) {
 			System.out.println(&quot;Internal error 2&quot;);
 			System.out.println(EpisodeMemoryRewards.size());
@@ -139,19 +119,33 @@ public class EpisodicSarsaLearner implements Learner, Serializable  {
 			// oldQ = oldaQ + alpha * delta
 			oldQValue.qvalue += alpha * delta;
 			model.updateQValue(oldQValue);
-//
 			newQValue = oldQValue;
 		}
 		
 		epsilon *= .99;
 		episode++;
-		
-		EpisodeMemoryRewards.clear();
-		EpisodeMemoryQValues.clear();
-		
+	}
+
+	public void writeInfo(String fileName) {
+		model.writeInfo(fileName);
 	}
 	
-	public void agent_cleanup() {
+	public void loadMe(ObjectInputStream in) {
+		try	{
+			egreedy = (EGreedyPolicy) in.readObject();
+			model = (DeterministicModel) in.readObject();
+			alpha = in.readDouble();
+			epsilon = in.readDouble(); 
+			sarsa_gamma = in.readDouble();
+			step_num = in.readInt();
+		} catch(IOException ex)	{
+			ex.printStackTrace();
+		} catch(ClassNotFoundException ex){
+			ex.printStackTrace();
+		}
+	}
+	
+	public void agentCleanup() {
 		EpisodeMemoryQValues = null;
 		EpisodeMemoryRewards = null;
 	}
@@ -196,4 +190,31 @@ public class EpisodicSarsaLearner implements Learner, Serializable  {
 			ex.printStackTrace();
 		}
 	}
+
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentFreezeExploring()
+	 */
+	@Override
+	public void agentFreezeExploring() {
+		// TODO Auto-generated method stub
+		
+	}
+
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentUnfreeze()
+	 */
+	@Override
+	public void agentUnfreeze() {
+		// TODO Auto-generated method stub
+		
+	}
+
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentUnfreezeExploring()
+	 */
+	@Override
+	public void agentUnfreezeExploring() {
+		// TODO Auto-generated method stub
+		
+	}
 }</diff>
      <filename>src/learners/EpisodicSarsaLearner.java</filename>
    </modified>
    <modified>
      <diff>@@ -35,7 +35,7 @@ public class QLearner implements Learner, Serializable  {
 		this.model = model;
 	}
 
-	public void agent_init() {
+	public void agentInit() {
 		// model = new Tree();
 		egreedy = new EGreedyPolicy();
 		run++;
@@ -44,7 +44,7 @@ public class QLearner implements Learner, Serializable  {
 
 	// At the end of a trial agent_cleanup will be called to allow for the deallocation of any resources created in agent_init.
 	// Calls to agent_init and agent_cleanup should always be in a one to one ratio. 
-	public void agent_cleanup() {
+	public void agentCleanup() {
 		// TODO: delete qtree;
 	}
 	
@@ -79,7 +79,7 @@ public class QLearner implements Learner, Serializable  {
 	
 	// If the environment is episodic, the agent_end method will be called to allow for the experimenter 
 	// to complete the last step of his/her learning algorithm.
-	public void agent_end(double reward) {
+	public void agentEnd(double reward) {
 		//Update last step of current episode
 		double error = reward - sarsa_gamma * oldQValue.qvalue;
 		oldQValue.qvalue += alpha * error; 
@@ -92,16 +92,16 @@ public class QLearner implements Learner, Serializable  {
 		episode++;
 	}
 
-	public void agent_freeze() {
+	public void agentFreeze() {
 		alpha = 0f;
 		epsilon = 0f;
 	}
 
-	public String agent_message(String arg0) {
+	public String agentMessage(String arg0) {
 		return &quot;SarsaAgent does not implement agent_message currently&quot;;
 	}
 
-	public rrl.Action agent_start(State state) {		
+	public rrl.Action agentStart(State state) {		
 		rrl.Action action = egreedy.chooseAction(model, epsilon, state);  
 		
 		oldQValue = model.findQValue(state, action);
@@ -113,7 +113,7 @@ public class QLearner implements Learner, Serializable  {
 		return action;
 	}
 	
-	public rrl.Action agent_step(double reward, State state) {
+	public rrl.Action agentStep(double reward, State state) {
 		rrl.Action newAction = egreedy.chooseAction(model, epsilon, state);
 		
 		QValue newQValue = model.findQValue(state, newAction);
@@ -167,4 +167,31 @@ public class QLearner implements Learner, Serializable  {
 			ex.printStackTrace();
 		}
 	}
+
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentFreezeExploring()
+	 */
+	@Override
+	public void agentFreezeExploring() {
+		// TODO Auto-generated method stub
+		
+	}
+
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentUnfreeze()
+	 */
+	@Override
+	public void agentUnfreeze() {
+		// TODO Auto-generated method stub
+		
+	}
+
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentUnfreezeExploring()
+	 */
+	@Override
+	public void agentUnfreezeExploring() {
+		// TODO Auto-generated method stub
+		
+	}
 }</diff>
      <filename>src/learners/QLearner.java</filename>
    </modified>
    <modified>
      <diff>@@ -36,21 +36,21 @@ public class RandomLearner implements Learner, Serializable  {
 		this.model = model;
 	}
 
-	public void agent_init() {
+	public void agentInit() {
 		// model = new Tree();
 		egreedy = new EGreedyPolicy();
 	}
 
-	public void agent_freeze() {
+	public void agentFreeze() {
 		alpha = 0f;
 		epsilon = 0f;
 	}
 
-	public String agent_message(String arg0) {
+	public String agentMessage(String arg0) {
 		return &quot;SarsaAgent does not implement agent_message currently&quot;;
 	}
 
-	public rrl.Action agent_start(State state) {		
+	public rrl.Action agentStart(State state) {		
 		rrl.Action action = egreedy.chooseAction(model, epsilon, state);  
 
 		QValue qValue = model.findQValue(state, action);
@@ -65,7 +65,7 @@ public class RandomLearner implements Learner, Serializable  {
 		return action;
 	}
 
-	public rrl.Action agent_step(double reward, State state) {
+	public rrl.Action agentStep(double reward, State state) {
 		rrl.Action newAction = egreedy.chooseAction(model, epsilon, state);
 
 		QValue qValue = model.findQValue(state, newAction);
@@ -81,7 +81,7 @@ public class RandomLearner implements Learner, Serializable  {
 		return newAction;
 	}
 
-	public void agent_end(double reward) {
+	public void agentEnd(double reward) {
 		numRewards += 1;
 		
 		if (numRewards != numQValues) {
@@ -92,6 +92,33 @@ public class RandomLearner implements Learner, Serializable  {
 		System.out.println(&quot;AGENT END &quot; + numRewards + &quot; qvalues &quot; + numQValues);
 	}
 
-	public void agent_cleanup() {
+	public void agentCleanup() {
+	}
+
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentFreezeExploring()
+	 */
+	@Override
+	public void agentFreezeExploring() {
+		// TODO Auto-generated method stub
+		
+	}
+
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentUnfreeze()
+	 */
+	@Override
+	public void agentUnfreeze() {
+		// TODO Auto-generated method stub
+		
+	}
+
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentUnfreezeExploring()
+	 */
+	@Override
+	public void agentUnfreezeExploring() {
+		// TODO Auto-generated method stub
+		
 	}
 }</diff>
      <filename>src/learners/RandomLearner.java</filename>
    </modified>
    <modified>
      <diff>@@ -38,7 +38,7 @@ public class SarsaLearner implements Learner, Serializable  {
 	// required by the agent as well as initialize any values which need to be reset at the beginning of every trial/run.
 	// ... value function
 	// ... values across episodes
-	public void agent_init() {
+	public void agentInit() {
 		// model = new Tree();
 		egreedy = new EGreedyPolicy();
 		run++;
@@ -47,7 +47,7 @@ public class SarsaLearner implements Learner, Serializable  {
 
 	// At the end of a trial agent_cleanup will be called to allow for the deallocation of any resources created in agent_init.
 	// Calls to agent_init and agent_cleanup should always be in a one to one ratio. 
-	public void agent_cleanup() {
+	public void agentCleanup() {
 		// TODO: delete qtree;
 	}
 	
@@ -64,7 +64,6 @@ public class SarsaLearner implements Learner, Serializable  {
 			ex.printStackTrace();
 		}
 	}
-
 	
 	public void saveAgent(String filename) {
 		FileOutputStream fos = null;
@@ -82,7 +81,7 @@ public class SarsaLearner implements Learner, Serializable  {
 	
 	// If the environment is episodic, the agent_end method will be called to allow for the experimenter 
 	// to complete the last step of his/her learning algorithm.
-	public void agent_end(double reward) {
+	public void agentEnd(double reward) {
 		//Update last step of current episode
 		double error = reward - sarsa_gamma * oldQValue.qvalue;
 		oldQValue.qvalue += alpha * error; 
@@ -102,20 +101,20 @@ public class SarsaLearner implements Learner, Serializable  {
 	// and is behaving consistently. One easy mistake is to forget any randomness in a policy. 
 	// If an agent is implementing an epsilon greedy policy as above, the agent will have to remove the epsilon 
 	// randomness after agent_freeze is called.
-	public void agent_freeze() {
+	public void agentFreeze() {
 		alpha = 0f;
 		epsilon = 0f;
 	}
 
 
 	// Almost anything that isn't covered by these functions can be attempted using the agent_message function.
-	public String agent_message(String arg0) {
+	public String agentMessage(String arg0) {
 		return &quot;SarsaAgent does not implement agent_message currently&quot;;
 	}
 
 	// chooses first action
 	// state -&gt; action
-	public rrl.Action agent_start(State state) {		
+	public rrl.Action agentStart(State state) {		
 		//Choose and return the agent's first action
 		rrl.Action action = egreedy.chooseAction(model, epsilon, state);  
 		
@@ -133,7 +132,7 @@ public class SarsaLearner implements Learner, Serializable  {
 	
 	// chooses next action
 	// state -&gt; new action
-	public rrl.Action agent_step(double reward, State state) {
+	public rrl.Action agentStep(double reward, State state) {
 		// Update one step, return agent's action
 		rrl.Action newAction = egreedy.chooseAction(model, epsilon, state);
 		
@@ -192,4 +191,28 @@ public class SarsaLearner implements Learner, Serializable  {
 			ex.printStackTrace();
 		}
 	}
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentFreezeExploring()
+	 */
+	@Override
+	public void agentFreezeExploring() {
+		// TODO Auto-generated method stub
+		
+	}
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentUnfreeze()
+	 */
+	@Override
+	public void agentUnfreeze() {
+		// TODO Auto-generated method stub
+		
+	}
+	/* (non-Javadoc)
+	 * @see rrl.Learner#agentUnfreezeExploring()
+	 */
+	@Override
+	public void agentUnfreezeExploring() {
+		// TODO Auto-generated method stub
+		
+	}
 }</diff>
      <filename>src/learners/SarsaLearner.java</filename>
    </modified>
    <modified>
      <diff>@@ -9,11 +9,14 @@ package rrl;
  *
  */
 public interface Learner {
-		public void agent_init();
-		public void agent_cleanup();
-		public void agent_end(double reward);
-		public void agent_freeze();
-		public String agent_message(String arg0);
-		public rrl.Action agent_start(State state);
-		public rrl.Action agent_step(double reward, State state);
+		public void agentInit();
+		public void agentCleanup();
+		public void agentEnd(double reward);
+		public void agentFreeze();
+		public void agentUnfreeze();
+		public void agentFreezeExploring();
+		public void agentUnfreezeExploring();
+		public String agentMessage(String arg0);
+		public rrl.Action agentStart(State state);
+		public rrl.Action agentStep(double reward, State state);
 }</diff>
      <filename>src/rrl/Learner.java</filename>
    </modified>
  </modified>
  <removed type="array"/>
  <parents type="array">
    <parent>
      <id>eb3df449e53c6de8e872c2b56a51c1efdcc01791</id>
    </parent>
  </parents>
  <author>
    <name>Stanislav Slusny</name>
    <email>slusnys@gmail.com</email>
  </author>
  <url>http://github.com/slusnys/learny/commit/eac432d46f5623898537535b532bbc0badf1226d</url>
  <id>eac432d46f5623898537535b532bbc0badf1226d</id>
  <committed-date>2009-02-18T01:39:00-08:00</committed-date>
  <authored-date>2009-02-18T01:39:00-08:00</authored-date>
  <message>Finished adaptation to RL-glue 3.0</message>
  <tree>28278af16724089002083aed911c4936bdf80989</tree>
  <committer>
    <name>Stanislav Slusny</name>
    <email>slusnys@gmail.com</email>
  </committer>
</commit>
