From 90749c05ac51962cfee3ddce2fefd0f4ec24eed9 Mon Sep 17 00:00:00 2001
From: Erwin Lejeune <erwin.lejeune15@gmail.com>
Date: Thu, 24 Dec 2020 16:07:06 +0100
Subject: [PATCH 1/5] improve plotting

---
 jupyddl/data_analyst.py | 79 ++++++++++++++++++++++++++++-------------
 1 file changed, 54 insertions(+), 25 deletions(-)

diff --git a/jupyddl/data_analyst.py b/jupyddl/data_analyst.py
index 4f0e1da..c9ddeb9 100644
--- a/jupyddl/data_analyst.py
+++ b/jupyddl/data_analyst.py
@@ -37,12 +37,18 @@ def __get_all_pddl_from_data(self):
         return [("data/problem.pddl", "data/domain.pddl")]
 
     def __plot_data(self, times, total_nodes, plot_title):
-        plt.plot(total_nodes, times, "b:o")
+        data = dict()
+        for i in range(len(total_nodes)):
+            data[total_nodes[i]] = times[i]
+        nodes_sorted = sorted(list(data.keys()))
+        times_y = []
+        for node_opened in nodes_sorted:
+            times_y.append(data[node_opened])
+        plt.plot(nodes_sorted, times_y, "r:o")
         plt.xlabel("Number of opened nodes")
         plt.ylabel("Planning computation time")
+        plt.xscale('symlog')
         plt.title(plot_title)
-        plt.xscale("symlog")
-        plt.yscale("log")
         plt.grid(True)
         plt.show(block=False)
 
@@ -50,9 +56,8 @@ def __scatter_data(self, times, total_nodes, plot_title):
         plt.scatter(total_nodes, times)
         plt.xlabel("Number of opened nodes")
         plt.ylabel("Planning computation time")
+        plt.xscale('symlog')
         plt.title(plot_title)
-        plt.xscale("symlog")
-        plt.yscale("log")
         plt.grid(True)
         plt.show(block=False)
 
@@ -61,7 +66,6 @@ def __gather_data_astar(
     ):
         has_multiple_files_tested = True
         if not domain_path or not problem_path:
-            has_multiple_files_tested = False
             metrics = dict()
             for problem, domain in self.__get_all_pddl_from_data():
                 logging.debug("Loading new PDDL instance planned with A*...")
@@ -69,7 +73,7 @@ def __gather_data_astar(
                 logging.debug("Problem: " + problem)
                 apla = AutomatedPlanner(domain, problem)
                 if heuristic_key in apla.available_heuristics:
-                    _, total_time, opened_nodes = apla.astar_best_first_search(
+                    path, total_time, opened_nodes = apla.astar_best_first_search(
                         heuristic=apla.available_heuristics[heuristic_key]
                     )
                 else:
@@ -77,17 +81,21 @@ def __gather_data_astar(
                         "Heuristic is not implemented! (Key not found in registered heuristics dict)"
                     )
                     return [0], [0], has_multiple_files_tested
-                metrics[total_time] = opened_nodes
+                if path:
+                    metrics[total_time] = opened_nodes
+                else:
+                    metrics[0] = 0
 
             total_nodes = list(metrics.values())
             times = list(metrics.keys())
             return times, total_nodes, has_multiple_files_tested
+        has_multiple_files_tested = False
         logging.debug("Loading new PDDL instance...")
         logging.debug("Domain: " + domain_path)
         logging.debug("Problem: " + problem_path)
         apla = AutomatedPlanner(domain_path, problem_path)
         if heuristic_key in apla.available_heuristics:
-            _, total_time, opened_nodes = apla.astar_best_first_search(
+            path, total_time, opened_nodes = apla.astar_best_first_search(
                 heuristic=apla.available_heuristics[heuristic_key]
             )
         else:
@@ -95,7 +103,10 @@ def __gather_data_astar(
                 "Heuristic is not implemented! (Key not found in registered heuristics dict)"
             )
             return [0], [0], has_multiple_files_tested
-        return [total_time], [opened_nodes], has_multiple_files_tested
+        if path:
+            return [total_time], [opened_nodes], has_multiple_files_tested
+        else:
+            return [0], [0], has_multiple_files_tested
 
     def plot_astar_data(self, heuristic_key="goal_count", domain="", problem=""):
         if bool(not problem) != bool(not domain):
@@ -115,25 +126,31 @@ def plot_astar_data(self, heuristic_key="goal_count", domain="", problem=""):
     def __gather_data_bfs(self, domain_path="", problem_path=""):
         has_multiple_files_tested = True
         if not domain_path or not problem_path:
-            has_multiple_files_tested = False
             metrics = dict()
             for problem, domain in self.__get_all_pddl_from_data():
                 logging.debug("Loading new PDDL instance planned with BFS...")
                 logging.debug("Domain: " + domain)
                 logging.debug("Problem: " + problem)
                 apla = AutomatedPlanner(domain, problem)
-                _, total_time, opened_nodes = apla.breadth_first_search()
-                metrics[total_time] = opened_nodes
+                path, total_time, opened_nodes = apla.breadth_first_search()
+                if path:
+                    metrics[total_time] = opened_nodes
+                else:
+                    metrics[0] = 0
 
             total_nodes = list(metrics.values())
             times = list(metrics.keys())
             return times, total_nodes, has_multiple_files_tested
+        has_multiple_files_tested = False
         logging.debug("Loading new PDDL instance...")
         logging.debug("Domain: " + domain_path)
         logging.debug("Problem: " + problem_path)
         apla = AutomatedPlanner(domain_path, problem_path)
-        _, total_time, opened_nodes = apla.breadth_first_search()
-        return [total_time], [opened_nodes], has_multiple_files_tested
+        path, total_time, opened_nodes = apla.breadth_first_search()
+        if path:
+            return [total_time], [opened_nodes], has_multiple_files_tested
+        else:
+            return [0], [0], has_multiple_files_tested
 
     def plot_bfs(self, domain="", problem=""):
         title = "BFS Statistics"
@@ -153,25 +170,31 @@ def plot_bfs(self, domain="", problem=""):
     def __gather_data_dfs(self, domain_path="", problem_path=""):
         has_multiple_files_tested = True
         if not domain_path or not problem_path:
-            has_multiple_files_tested = False
             metrics = dict()
             for problem, domain in self.__get_all_pddl_from_data():
                 logging.debug("Loading new PDDL instance planned with DFS...")
                 logging.debug("Domain: " + domain)
                 logging.debug("Problem: " + problem)
                 apla = AutomatedPlanner(domain, problem)
-                _, total_time, opened_nodes = apla.depth_first_search()
-                metrics[total_time] = opened_nodes
+                path, total_time, opened_nodes = apla.depth_first_search()
+                if path:
+                    metrics[total_time] = opened_nodes
+                else:
+                    metrics[0] = 0
 
             total_nodes = list(metrics.values())
             times = list(metrics.keys())
             return times, total_nodes, has_multiple_files_tested
+        has_multiple_files_tested = False
         logging.debug("Loading new PDDL instance...")
         logging.debug("Domain: " + domain_path)
         logging.debug("Problem: " + problem_path)
         apla = AutomatedPlanner(domain_path, problem_path)
-        _, total_time, opened_nodes = apla.depth_first_search()
-        return [total_time], [opened_nodes], has_multiple_files_tested
+        path, total_time, opened_nodes = apla.depth_first_search()
+        if path:
+            return [total_time], [opened_nodes], has_multiple_files_tested
+        else:
+            return [0], [0], has_multiple_files_tested
 
     def plot_dfs(self, problem="", domain=""):
         title = "DFS Statistics"
@@ -191,25 +214,31 @@ def plot_dfs(self, problem="", domain=""):
     def __gather_data_dijkstra(self, domain_path="", problem_path=""):
         has_multiple_files_tested = True
         if not domain_path or not problem_path:
-            has_multiple_files_tested = False
             metrics = dict()
             for problem, domain in self.__get_all_pddl_from_data():
                 logging.debug("Loading new PDDL instance planned with Dijkstra...")
                 logging.debug("Domain: " + domain)
                 logging.debug("Problem: " + problem)
                 apla = AutomatedPlanner(domain, problem)
-                _, total_time, opened_nodes = apla.dijktra_best_first_search()
-                metrics[total_time] = opened_nodes
+                path, total_time, opened_nodes = apla.dijktra_best_first_search()
+                if path:
+                    metrics[total_time] = opened_nodes
+                else:
+                    metrics[0] = 0
 
             total_nodes = list(metrics.values())
             times = list(metrics.keys())
             return times, total_nodes, has_multiple_files_tested
+        has_multiple_files_tested = False
         logging.debug("Loading new PDDL instance...")
         logging.debug("Domain: " + domain_path)
         logging.debug("Problem: " + problem_path)
         apla = AutomatedPlanner(domain_path, problem_path)
-        _, total_time, opened_nodes = apla.dijktra_best_first_search()
-        return [total_time], [opened_nodes], has_multiple_files_tested
+        path, total_time, opened_nodes = apla.dijktra_best_first_search()
+        if path:
+            return [total_time], [opened_nodes], has_multiple_files_tested
+        else:
+            return [0], [0], has_multiple_files_tested
 
     def plot_dijkstra(self, problem="", domain=""):
         title = "Dijkstra Statistics"

From a462908000ad7181c0752792c431b5ce928f405f Mon Sep 17 00:00:00 2001
From: Erwin Lejeune <erwin.lejeune15@gmail.com>
Date: Thu, 24 Dec 2020 16:55:40 +0100
Subject: [PATCH 2/5] improve perf on first run

---
 jupyddl/automated_planner.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/jupyddl/automated_planner.py b/jupyddl/automated_planner.py
index 7a14da9..0798b51 100644
--- a/jupyddl/automated_planner.py
+++ b/jupyddl/automated_planner.py
@@ -31,6 +31,15 @@ def __init__(self, domain_path, problem_path, log_level="DEBUG"):
         self.logger = logging.getLogger("automated_planning")
         coloredlogs.install(level=log_level)
 
+        # Running external Julia functions once to create the routes
+        self.__run_julia_once()
+
+    def __run_julia_once(self):
+        self.satisfies(self.problem.goal, self.initial_state)
+        self.state_has_term(self.initial_state, self.goals[0])
+        actions = self.available_actions(self.initial_state)
+        self.transition(self.initial_state, actions[0])
+
     def __init_logger(self, log_level):
         import os
 

From cc561fd1f77ed8c037fffefed0f3e3df83a49651 Mon Sep 17 00:00:00 2001
From: Erwin Lejeune <erwin.lejeune15@gmail.com>
Date: Thu, 24 Dec 2020 17:04:03 +0100
Subject: [PATCH 3/5] add docs readme

---
 README.md | 30 +++++++++++++++++++++++++++++-
 1 file changed, 29 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 97e4164..75a646b 100644
--- a/README.md
+++ b/README.md
@@ -51,7 +51,9 @@ $ python3 -m pip install jupyddl
 # REFL Mode
 
 - Run `python3` in the terminal.
-- Use the AutomatedPlanner class to do what you want:
+
+## [AutomatedPlanner]
+
 ```python
 from jupyddl import AutomatedPlanner # takes some time because it has to instantiate the Julia interface
 apl = AutomatedPlanner("data/domain.pddl", "data/problem.pddl)
@@ -77,6 +79,32 @@ print(apl.get_actions_from_path(path))
 [<PyCall.jlwrap flip_row(r1)>, <PyCall.jlwrap flip_row(r3)>, <PyCall.jlwrap flip_column(c2)>]
 ```
 
+## [Data Analyst]
+
+Make sure you have a data folder where you run your environment that contains independent folders with "domain.pddl" and "problem.pddl" files, with those standard names.
+
+```python
+from jupyddl import DataAnalyst
+
+da = DataAnalyst()
+da.plot_astar_data() # plots complexity statistics for all the problem.pddl/domain.pddl couples in the data/ folder
+
+da.plot_astar_data(problem="data/flip/problem.pddl", domain="data/flip/domain.pddl") # scatter complexity statistics for the provided pddl
+
+da.plot_astar_data(heuristic_key="zero") # use h=0 instead of goal_count for your computation
+
+da.plot_dfs() # same as astar
+
+da.comparative_data_plot() # Run all planners on the data folder and plots them on the same figure, data is stored in a data.json file 
+
+da.comparative_data_plot(astar=False) # Exclude astar from the comparative plot
+
+da.comparative_data_plot(heuristic_key="zero") # use zero heuristic for h based planners
+
+da.comparative_data_plot(collect_new_data=False) # uses data.json to plot the data
+```
+
+
 # Contribute
 
 Open an issue to state clearly the contribution you want to make. Upon aproval send in a PR with the Issue referenced. (Implement Issue #No / Fix Issue #No).

From 5deaf7158819f9457b3fff798c7159dac6cfa622 Mon Sep 17 00:00:00 2001
From: Erwin Lejeune <erwin.lejeune15@gmail.com>
Date: Thu, 24 Dec 2020 17:05:41 +0100
Subject: [PATCH 4/5] remove format on PR

---
 .github/workflows/format.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml
index 08d8a90..f5be8c4 100644
--- a/.github/workflows/format.yml
+++ b/.github/workflows/format.yml
@@ -1,6 +1,6 @@
 name: format
 on:
-  pull_request:
+  push:
     branches: [main]
 jobs:
   format:
@@ -25,4 +25,4 @@ jobs:
         uses: stefanzweifel/git-auto-commit-action@v4.8.0
         with:
           commit_message: Apply formatting changes
-          branch: ${{ github.head_ref }}
+          branch: main

From 98c77e46d3fbc784d6b05b4f82c2b082e019e500 Mon Sep 17 00:00:00 2001
From: Erwin Lejeune <erwin.lejeune15@gmail.com>
Date: Thu, 24 Dec 2020 17:09:23 +0100
Subject: [PATCH 5/5] apply codefactor requests

---
 jupyddl/data_analyst.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/jupyddl/data_analyst.py b/jupyddl/data_analyst.py
index c9ddeb9..2eede89 100644
--- a/jupyddl/data_analyst.py
+++ b/jupyddl/data_analyst.py
@@ -38,8 +38,8 @@ def __get_all_pddl_from_data(self):
 
     def __plot_data(self, times, total_nodes, plot_title):
         data = dict()
-        for i in range(len(total_nodes)):
-            data[total_nodes[i]] = times[i]
+        for i, val in enumerate(total_nodes):
+            data[val] = times[i]
         nodes_sorted = sorted(list(data.keys()))
         times_y = []
         for node_opened in nodes_sorted:
@@ -105,8 +105,7 @@ def __gather_data_astar(
             return [0], [0], has_multiple_files_tested
         if path:
             return [total_time], [opened_nodes], has_multiple_files_tested
-        else:
-            return [0], [0], has_multiple_files_tested
+        return [0], [0], has_multiple_files_tested
 
     def plot_astar_data(self, heuristic_key="goal_count", domain="", problem=""):
         if bool(not problem) != bool(not domain):
@@ -149,8 +148,7 @@ def __gather_data_bfs(self, domain_path="", problem_path=""):
         path, total_time, opened_nodes = apla.breadth_first_search()
         if path:
             return [total_time], [opened_nodes], has_multiple_files_tested
-        else:
-            return [0], [0], has_multiple_files_tested
+        return [0], [0], has_multiple_files_tested
 
     def plot_bfs(self, domain="", problem=""):
         title = "BFS Statistics"
@@ -193,8 +191,7 @@ def __gather_data_dfs(self, domain_path="", problem_path=""):
         path, total_time, opened_nodes = apla.depth_first_search()
         if path:
             return [total_time], [opened_nodes], has_multiple_files_tested
-        else:
-            return [0], [0], has_multiple_files_tested
+        return [0], [0], has_multiple_files_tested
 
     def plot_dfs(self, problem="", domain=""):
         title = "DFS Statistics"
@@ -237,8 +234,7 @@ def __gather_data_dijkstra(self, domain_path="", problem_path=""):
         path, total_time, opened_nodes = apla.dijktra_best_first_search()
         if path:
             return [total_time], [opened_nodes], has_multiple_files_tested
-        else:
-            return [0], [0], has_multiple_files_tested
+        return [0], [0], has_multiple_files_tested
 
     def plot_dijkstra(self, problem="", domain=""):
         title = "Dijkstra Statistics"