Feature/1 - Running The Scripts (#14)

PR Release Notes: * README.md includes local script execution in step by step description * controller.py updated, so it runs all the scripts from one place * clean_env_before_mining.py fully migrated to new technology
AbsaOSS · May 27, 2024 · ee17607 · ee17607
1 parent c8121fb
commit ee17607
Show file tree

Hide file tree

Showing 9 changed files with 144 additions and 68 deletions.
diff --git a/.gitignore b/.gitignore
@@ -159,3 +159,16 @@ cython_debug/
 /.idea/
 node_modules
 /package-lock.json
+
+# Generated outputs
+dist/data/
+dist/output/
+src/data/
+src/output/
+
+# Debugging files
+debugme_script.sh
+debugme.sh
+
+
+
diff --git a/README.md b/README.md
@@ -22,7 +22,7 @@
 - [License Information](#license-information)
 - [Contact or Support Information](#contact-or-support-information)
 
-A tool designed to data-mine GitHub repositories for issues containing project documentation (e.g. tagged with feature-related labels). This tool automatically generates comprehensive living documentation in markdown format, providing detailed feature overview pages and in-depth feature descriptions.
+A tool designed to data-mine GitHub repositories for issues containing project documentation (e.g. tagged with feature-related labels). This tool automatically generates comprehensive living documentation in Markdown format, providing detailed feature overview pages and in-depth feature descriptions.
 
 ## Motivation
 Addresses the need for continuously updated documentation accessible to all team members and stakeholders. Achieves this by extracting information directly from GitHub issues and integrating this functionality to deliver real-time, markdown-formatted output. Ensures everyone has the most current project details, fostering better communication and efficiency throughout development.
@@ -211,6 +211,71 @@ source venv/bin/activate
 pip install -r requirements.txt
 ```
 
+## Run scripts locally
+If you need to run the scripts locally, follow these steps:
+
+### Create the shell script
+Create the shell file in the root directory. We will use `run_script.sh`.
+```shell
+touch run_script.sh
+```
+Add the shebang line at the top of the sh script file.
+```
+#!/bin/sh
+```
+
+### Set the environment variables
+Set the configuration environment variables in the shell script following the structure below. 
+Also make sure that the GITHUB_TOKEN is configured in your environment variables.
+```
+export GITHUB_TOKEN=$(printenv GITHUB_TOKEN)
+export PROJECT_STATE_MINING="true"
+export PROJECTS_TITLE_FILTER="[]"
+export MILESTONES_AS_CHAPTERS="true"
+export REPOSITORIES='[
+            {
+              "orgName": "OrgName",
+              "repoName": "example-project",
+              "queryLabels": ["feature", "bug"]
+            }
+          ]'
+```
+
+### Running the GH action locally
+For running the whole GitHub action, add the following commands to the shell script:
+```
+cd src || exit 1
+
+python3 controller.py --github-token "$GITHUB_TOKEN" \
+            --project-state-mining "$PROJECT_STATE_MINING" \
+            --projects-title-filter "$PROJECTS_TITLE_FILTER" \
+            --milestones-as-chapters "$MILESTONES_AS_CHAPTERS" \
+            --repositories "$REPOSITORIES"
+
+cd .. || exit 1
+```
+
+### Running single script locally
+For running just one script at the time, add the following commands to the shell script:
+```
+cd src || exit 1
+
+python3 github_query_project_state.py
+
+cd .. || exit 1
+```
+
+### Make the Script Executable
+From the terminal that is in the root of this project, make the script executable:
+```shell
+chmod +x run_script.sh
+```
+
+### Run the Script
+```shell
+./run_script.sh
+```
+
 ## Run unit test
 TODO - check this chapter and update by latest state
 ### Launch unit tests

diff --git a/debugme.sh b/debugme.sh
diff --git a/dist/clean_env_before_mining.py b/dist/clean_env_before_mining.py
@@ -11,12 +11,6 @@
 import shutil
 
 
-# Directories used in the project
-FETCH_DIRECTORY = "data/fetched_data"
-CONSOLIDATION_DIRECTORY = "data/consolidation_data"
-MARKDOWN_PAGE_DIRECTORY = "output/markdown_pages"
-
-
 def clean_directory_content(script_dir: str, directory: str) -> None:
     """
         Deletes all content from the specified directory.
@@ -37,17 +31,26 @@ def clean_directory_content(script_dir: str, directory: str) -> None:
         shutil.rmtree(directory_path)
 
 
-if __name__ == "__main__":
-    print("Data mining for Living Documentation started")
+def clean_environment():
+    print("Cleaning environment for the Living Doc Generator")
 
     # Get the directory of the current script
     script_dir = os.path.dirname(os.path.abspath(__file__))
 
+    # Get the directory variables from the environment variables
+    fetch_directory = os.environ['FETCH_DIRECTORY']
+    consolidation_directory = os.environ['CONSOLIDATION_DIRECTORY']
+    markdown_page_directory = os.environ['MARKDOWN_PAGE_DIRECTORY']
+
     # Clean the fetched data directories
-    clean_directory_content(script_dir, FETCH_DIRECTORY)
-    clean_directory_content(script_dir, CONSOLIDATION_DIRECTORY)
+    clean_directory_content(script_dir, fetch_directory)
+    clean_directory_content(script_dir, consolidation_directory)
 
     # Clean the output directory
-    clean_directory_content(script_dir, MARKDOWN_PAGE_DIRECTORY)
+    clean_directory_content(script_dir, markdown_page_directory)
 
-    print("Data mining for Living Documentation ended")
+    print("Cleaning of env for Living Documentation ended")
+
+
+if __name__ == "__main__":
+    clean_environment()
diff --git a/dist/controller.py b/dist/controller.py
@@ -2,7 +2,6 @@
 import subprocess
 import argparse
 import sys
-import re
 
 
 def extract_args():
@@ -22,20 +21,19 @@ def extract_args():
         'PROJECT_STATE_MINING': args.project_state_mining,
         'PROJECTS_TITLE_FILTER': args.projects_title_filter,
         'MILESTONES_AS_CHAPTERS': args.milestones_as_chapters,
-        'REPOSITORIES': args.repositories
+        'REPOSITORIES': args.repositories,
+        'FETCH_DIRECTORY': "src/data/fetched_data",
+        'CONSOLIDATION_DIRECTORY': "src/data/consolidation_data",
+        'MARKDOWN_PAGE_DIRECTORY': "src/output/markdown_pages"
     }
 
     return env_vars
 
 
-def run_script(script_name, env_vars):
+def run_script(script_name, env):
     """ Helper function to run a Python script with environment variables using subprocess """
     try:
-        # Setting up the environment for subprocess
-        env = os.environ.copy()
-        env.update(env_vars)
-
-        # Running the script with updated environment
+        # Running the python script with given environment variables
         result = subprocess.run(['python3', script_name], env=env, text=True, capture_output=True, check=True)
         print(f"Output from {script_name}: {result.stdout}")
 
@@ -48,25 +46,28 @@ def main():
     print("Extracting arguments from command line.")
     env_vars = extract_args()
 
-    # Clean environment before mining
-    print("Data mining for Living Documentation")
-    run_script('clean_env_before_mining.py', env_vars)
+    # Create a local copy of the current environment variables
+    local_env = os.environ.copy()
+
+    # Add the script-specific environment variables to the local copy
+    local_env.update(env_vars)
+
+    print("Starting the Living Documentation Generator - mining phase")
+
+    # Clean the environment before mining
+    run_script('clean_env_before_mining.py', local_env)
 
     # Data mine GitHub features from repository
-    print("Downloading issues from GitHub")
-    run_script('github_query_issues.py', env_vars)
+    run_script('github_query_issues.py', local_env)
 
     # Data mine GitHub project's state
-    print("Downloading project's state")
-    run_script('github_query_project_state.py', env_vars)
+    run_script('github_query_project_state.py', local_env)
 
     # Consolidate all feature data together
-    print("Consolidating mined feature data")
-    run_script('consolidate_feature_data.py', env_vars)
+    run_script('consolidate_feature_data.py', local_env)
 
     # Generate markdown pages
-    print("Converting features to markdown pages")
-    run_script('convert_features_to_pages.py', env_vars)
+    run_script('convert_features_to_pages.py', local_env)
 
 
 if __name__ == '__main__':

diff --git a/notes.txt b/notes.txt
diff --git a/src/clean_env_before_mining.py b/src/clean_env_before_mining.py
@@ -11,12 +11,6 @@
 import shutil
 
 
-# Directories used in the project
-FETCH_DIRECTORY = "data/fetched_data"
-CONSOLIDATION_DIRECTORY = "data/consolidation_data"
-MARKDOWN_PAGE_DIRECTORY = "output/markdown_pages"
-
-
 def clean_directory_content(script_dir: str, directory: str) -> None:
     """
         Deletes all content from the specified directory.
@@ -37,25 +31,26 @@ def clean_directory_content(script_dir: str, directory: str) -> None:
         shutil.rmtree(directory_path)
 
 
-def clean_environment(env_vars):
-    print("Cleaning of env for Living Documentation started")
+def clean_environment():
+    print("Cleaning environment for the Living Doc Generator")
 
     # Get the directory of the current script
     script_dir = os.path.dirname(os.path.abspath(__file__))
 
+    # Get the directory variables from the environment variables
+    fetch_directory = os.environ['FETCH_DIRECTORY']
+    consolidation_directory = os.environ['CONSOLIDATION_DIRECTORY']
+    markdown_page_directory = os.environ['MARKDOWN_PAGE_DIRECTORY']
+
     # Clean the fetched data directories
-    clean_directory_content(script_dir, FETCH_DIRECTORY)
-    clean_directory_content(script_dir, CONSOLIDATION_DIRECTORY)
+    clean_directory_content(script_dir, fetch_directory)
+    clean_directory_content(script_dir, consolidation_directory)
 
     # Clean the output directory
-    clean_directory_content(script_dir, MARKDOWN_PAGE_DIRECTORY)
+    clean_directory_content(script_dir, markdown_page_directory)
 
     print("Cleaning of env for Living Documentation ended")
 
 
 if __name__ == "__main__":
-    import sys
-    env_vars = sys.argv[1:]
-    clean_environment(env_vars)
-    # TODO - move dir variables definition to the controlling script - add them to the env_vars
-    #  - to able to provide them to all rest scripts #Issue: https://github.com/AbsaOSS/living-doc-generator/issues/4
+    clean_environment()
diff --git a/src/containers/__init__.py b/src/containers/__init__.py
diff --git a/src/controller.py b/src/controller.py
@@ -2,7 +2,6 @@
 import subprocess
 import argparse
 import sys
-from clean_env_before_mining import clean_environment
 
 
 def extract_args():
@@ -22,20 +21,19 @@ def extract_args():
         'PROJECT_STATE_MINING': args.project_state_mining,
         'PROJECTS_TITLE_FILTER': args.projects_title_filter,
         'MILESTONES_AS_CHAPTERS': args.milestones_as_chapters,
-        'REPOSITORIES': args.repositories
+        'REPOSITORIES': args.repositories,
+        'FETCH_DIRECTORY': "src/data/fetched_data",
+        'CONSOLIDATION_DIRECTORY': "src/data/consolidation_data",
+        'MARKDOWN_PAGE_DIRECTORY': "src/output/markdown_pages"
     }
 
     return env_vars
 
 
-def run_script(script_name, env_vars):
+def run_script(script_name, env):
     """ Helper function to run a Python script with environment variables using subprocess """
     try:
-        # Setting up the environment for subprocess
-        env = os.environ.copy()
-        env.update(env_vars)
-
-        # Running the script with updated environment
+        # Running the python script with given environment variables
         result = subprocess.run(['python3', script_name], env=env, text=True, capture_output=True, check=True)
         print(f"Output from {script_name}: {result.stdout}")
 
@@ -48,21 +46,28 @@ def main():
     print("Extracting arguments from command line.")
     env_vars = extract_args()
 
-    print("Data mining for Living Documentation")
+    # Create a local copy of the current environment variables
+    local_env = os.environ.copy()
+
+    # Add the script-specific environment variables to the local copy
+    local_env.update(env_vars)
+
+    print("Starting the Living Documentation Generator - mining phase")
 
-    clean_environment(env_vars)
+    # Clean the environment before mining
+    run_script('clean_env_before_mining.py', local_env)
 
     # Data mine GitHub features from repository
-    run_script('github_query_issues.py', env_vars)
+    run_script('github_query_issues.py', local_env)
 
     # Data mine GitHub project's state
-    run_script('github_query_project_state.py', env_vars)
+    run_script('github_query_project_state.py', local_env)
 
     # Consolidate all feature data together
-    run_script('consolidate_feature_data.py', env_vars)
+    run_script('consolidate_feature_data.py', local_env)
 
     # Generate markdown pages
-    run_script('convert_features_to_pages.py', env_vars)
+    run_script('convert_features_to_pages.py', local_env)
 
 
 if __name__ == '__main__':