IntelLabs · michaelbeale-IL · Apr 7, 2025 · Apr 7, 2025 · Apr 7, 2025 · Apr 7, 2025
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -0,0 +1,63 @@
+# This workflow will install Python dependencies, run tests and lint with a single version of Python
+# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+
+name: Unit Tests
+
+on:
+  push:
+    branches: ["main"]
+  pull_request:
+    branches: ["main"]
+  workflow_dispatch:
+
+permissions:
+  contents: read
+
+jobs:
+  unittest:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python 3.13
+        uses: actions/setup-python@v5.2.0
+        with:
+          python-version: "3.13"
+
+      - name: Cache Python virtual environment
+        uses: actions/cache@v4
+        with:
+          path: ${{ github.workspace }}/.venv
+          key: ${{ runner.os }}-python-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-python-${{ hashFiles('**/poetry.lock') }}
+            ${{ runner.os }}-python-
+
+      - name: Install Poetry
+        run: |
+          python3 -m pip install --upgrade pip
+          python3 -m pip install poetry
+
+      - name: Configure Poetry to create virtual environment in project directory
+        run: |
+          poetry config virtualenvs.in-project true
+
+      - name: Install dependencies
+        run: |
+          poetry install
+          poetry env info
+
+      - name: Run tests
+        run: |
+          source .venv/bin/activate
+          poetry run pytest
+
+      - name: Upload coverage report
+        if: ${{ !env.ACT }}
+        uses: actions/upload-artifact@v4.4.0
+        with:
+          name: coverage-report
+          path: |
+            coverage.xml
diff --git a/.gitignore b/.gitignore
@@ -7,3 +7,8 @@ tests/fuzzer
 tmp
 *.pyc
 .coverage
+coverage.xml
+*.xlsx
+*.xls
+*.csv
+*.json
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -6,6 +6,9 @@
         "-p",
         "test_*.py"
     ],
-    "python.testing.pytestEnabled": false,
-    "python.testing.unittestEnabled": true,
-}
+    "python.testing.pytestEnabled": true,
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestArgs": [
+        "tests"
+    ],
+}
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "superBOM"
-version = "0.1.0"
+version = "0.2.0"
 description = ""
 authors = ["Michael Beale <michael.beale@intel.com>"]
 license = "Apache-2.0"
@@ -22,6 +22,9 @@ foss-flame = "^0.20.7"
 colorlog = "^6.9.0"
 poetry-core = "^1.9.1"
 packaging = "^24.2"
+pytest = "^8.3.4"
+pytest-cov = "^6.0.0"
+tomli = "^2.2.1"
 
 [tool.poetry.scripts]
 superbom = "superbom.main:main"

diff --git a/pytest.ini b/pytest.ini
@@ -0,0 +1,2 @@
+[pytest]
+addopts = --cov=superbom --cov-report=xml --cov-fail-under=85
diff --git a/src/superbom/main.py b/src/superbom/main.py
@@ -2,16 +2,20 @@
 # SPDX-License-Identifier: Apache 2.0
 
 import argparse
+import logging
 import sys
+from importlib.metadata import PackageNotFoundError, version
 from pathlib import Path
 from typing import Dict, List, Union
 
 import pandas as pd
+import tqdm
 
+from superbom.utils.logger import AppLogger
 from superbom.utils.packageindexes.conda.condadependencies import CondaPackageUtil
 from superbom.utils.packageindexes.pypi.pipdependencies import PyPIPackageUtil
-from superbom.utils.logger import AppLogger
 from superbom.utils.parsers import (
+    extract_toml_dependencies,
     parse_conda_env,
     parse_poetry_toml,
     parse_requirements,
@@ -48,6 +52,11 @@ def save_results(results: Dict[str, pd.DataFrame], output_path: str, format: str
         try:
             with pd.ExcelWriter(output_path, engine="openpyxl", mode="w") as writer:
                 for sheet_name, df in results.items():
+                    if df.empty:
+                        logger.warning(f"DataFrame for {sheet_name} is empty. Skipping.")
+                        continue
+                    # Ensure the sheet name is valid
+                    sheet_name = sheet_name[:31]  # Excel sheet name limit
                     df.to_excel(writer, sheet_name=sheet_name, index=False)
         except Exception as e:
             logger.error(f"Error writing to Excel file: {e}")
@@ -62,6 +71,31 @@ def save_results(results: Dict[str, pd.DataFrame], output_path: str, format: str
                 logger.info(f"License Info: {result}\n{df}")
 
 
+def process_items(items, process_method, *args, **kwargs) -> List:
+    """
+    Process items using the specified method.
+
+    Args:
+        items (list): List of items to process.
+        process_method (callable): Method to process each item.
+        *args: Additional arguments to pass to the process method.
+        **kwargs: Additional keyword arguments to pass to the process method.
+    """
+    results = []
+
+    for item in tqdm.tqdm(
+        items, desc="Processing items", unit="item", disable=logger.level > logging.INFO
+    ):
+        try:
+            result = process_method(item, *args, **kwargs)
+            if result:
+                results.append(result)
+        except Exception as e:
+            logger.error(f"Error processing item {item}: {e}")
+
+    return results
+
+
 def generatebom(args: argparse.ArgumentParser):
     """
     Generates a Bill of Materials (BOM) from environment files.
@@ -73,6 +107,7 @@ def generatebom(args: argparse.ArgumentParser):
             - platform (str, optional): Platform for which to retrieve package information.
             - output (str, optional): Path to save the output file.
             - format (str, optional): Format of the output file (e.g., 'table', 'json').
+            - version: Display the version of the package.
 
     Returns:
         None: The function saves the BOM to the specified output path in the specified format.
@@ -102,46 +137,64 @@ def generatebom(args: argparse.ArgumentParser):
     if args.verbose:
         logger.setLevel("DEBUG")
 
-    env_files = filter_by_extensions(args.path, ["yml", "txt", "toml"])
+    env_files = filter_by_extensions(args.path, ["yml", "yaml", "txt", "toml"])
 
     packageutil = CondaPackageUtil()
     pipdependencies = PyPIPackageUtil()
 
-    for env_file in env_files:
+    for index, env_file in enumerate(env_files):
         output_data = []
 
-        if env_file.suffix.lower() == ".yml":
+        if env_file.suffix.lower() in [".yml", ".yaml"] and env_file.stem == "environment":
             logger.info(f"Processing conda env file: {env_file}")
             channels, conda_packages, pip_packages = parse_conda_env(env_file)
+            if not conda_packages:
+                logger.warning(f"No conda packages found in {env_file}. Skipping.")
+                continue
+            if not channels:
+                logger.warning(f"No channels found in {env_file}. Skipping.")
+                continue
+            if not pip_packages:
+                logger.warning(f"No pip packages found in {env_file}. Skipping.")
+                continue
+
             if args.platform:
-                packageutil._cache.add_platform(args.platform)
+                packageutil._cache.platforms.append(args.platform)
 
             if channels:
-                packageutil._cache.add_channels(channels)
+                for channel in channels:
+                    packageutil._cache.add_channel(channel)
+
             else:
                 logger.warning("No channels specified in environment file. Using defaults.")
-                packageutil._cache.add_channels(packageutil._cache.DEFAULT_CHANNELS)
+                packageutil._cache.add_channel(packageutil._cache.DEFAULT_CHANNELS)
 
-            conda_data = packageutil.retrieve_conda_package_info(conda_packages)
-            conda_pip_data = pipdependencies.get_pip_packages_data(pip_packages)
-            output_data.extend(conda_data + conda_pip_data)
-            output_data = conda_data + conda_pip_data
+            conda_data = process_items(conda_packages, packageutil.retrieve_conda_package_info)
+            output_data.extend(conda_data)
+            conda_pip_data = process_items(pip_packages, pipdependencies.get_pip_package_data)
+            output_data.extend(conda_pip_data)
 
-        elif env_file.suffix.lower() == ".txt":
+        elif env_file.suffix.lower() == ".txt" and env_file.stem == "requirements":
             logger.info(f"Processing pip requirements file: {env_file}")
             pip_packages = parse_requirements(env_file)
-            pip_data = pipdependencies.get_pip_packages_data(pip_packages)
+            pip_data = process_items(pip_packages, pipdependencies.get_pip_package_data)
             output_data.extend(pip_data)
 
-        elif env_file.suffix.lower() == ".toml":
-            logger.info(f"Processing poetry file: {env_file}")
+        elif env_file.suffix.lower() == ".toml" and env_file.stem == "pyproject":
+            logger.info(f"Processing pyproject file: {env_file}")
 
             pip_packages = parse_poetry_toml(env_file)
-            pip_data = pipdependencies.get_pip_packages_data(pip_packages)
+            if not pip_packages:
+                pip_packages = extract_toml_dependencies(env_file)
+            pip_data = process_items(pip_packages, pipdependencies.get_pip_package_data)
             output_data.extend(pip_data)
 
-        df = pd.DataFrame(output_data)
-        results[env_file.name] = df
+        if output_data:
+            df = pd.DataFrame(output_data)
+            # use the parent directory name as the sheet name
+            sheet_name = env_file.parent.name if env_file.parent.name else "default"
+
+            results[sheet_name] = df
 
     # Save results
     # output_path = args.output if args.output else 'bom.xlsx'
@@ -195,9 +248,18 @@ def main(argv=None):
     # Verbosity command
     parser.add_argument("-v", "--verbose", action="store_true", help="Enable verbose logging")
 
+    # Version command
+    parser.add_argument(
+        "-V",
+        "--version",
+        action="version",
+        version=f"%(prog)s {version('superbom')}",
+        help="Show version and exit",
+    )
+
     args = parser.parse_args(argv)
     generatebom(args)
 
 
-if __name__ == "__main__": # pragma: no cover
+if __name__ == "__main__":  # pragma: no cover
     main()
diff --git a/src/superbom/utils/logger.py b/src/superbom/utils/logger.py
@@ -6,6 +6,7 @@
 
 import colorlog
 
+
 class AppLogger:
     _instance = None
     _initialized = False
@@ -48,3 +49,6 @@ def _setup_logger(self):
 
     def get_logger(self):
         return self.logger
+
+    def get_level(self):
+        return self.logger.level
diff --git a/src/superbom/utils/packageindexes/conda/condacache.py b/src/superbom/utils/packageindexes/conda/condacache.py
@@ -26,8 +26,8 @@ def __init__(self):
         self._cache_dir = Path.joinpath(Path.home(), ".cbomcache")
 
         self.caches = {}
-        self._platforms:List[str] = self.DEFAULT_PLATFORMS
-        self._channels:List[str] = self.DEFAULT_CHANNELS
+        self._platforms: List[str] = self.DEFAULT_PLATFORMS
+        self._channels: List[str] = self.DEFAULT_CHANNELS
 
     def add_cache(self, channel, platform):
         data = self.get_cached_data(channel, platform)
@@ -61,7 +61,7 @@ def platforms(self):
         return self._platforms
 
     @platforms.setter
-    def platforms(self, value:str):
+    def platforms(self, value: str):
         if not isinstance(value, str):
             raise TypeError("Platform must be a string")
 
@@ -71,18 +71,16 @@ def platforms(self, value:str):
     @property
     def channels(self):
         return self._channels
-    
-    @channels.setter
-    def channels(self, value: str) -> List[str]:
+
+    def add_channel(self, value: str):
+
         if not isinstance(value, str):
             raise TypeError("Channel must be a string")
-        
+
         if value in self.BANNED_CHANNELS:
-            logger.warning(
-                "Warning - Skipping Anaconda channels."
-            )
-        elif value not in self.channels:
-            self.channels.append(value)
+            logger.warning("Warning - Skipping Anaconda channels.")
+        elif value not in self._channels:
+            self._channels.append(value)
 
         return self._channels
 
@@ -168,6 +166,6 @@ def update_cache(self):
                     logger.debug(f"Data for {channel}/{platform} already cached")
 
 
-if __name__ == "__main__": # pragma: no cover
+if __name__ == "__main__":  # pragma: no cover
     cache = CondaCache()
     cache.update_cache()
-Original file line number
+Diff line change
@@ Expand Up / @@ -7,3 +7,8 @@ tests/fuzzer @@
     tmp
     *.pyc
     .coverage
+    coverage.xml
+    *.xlsx
+    *.xls
+    *.csv
+    *.json
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		[pytest]
		addopts = --cov=superbom --cov-report=xml --cov-fail-under=85