## モジュールのインポート

In [104]:
!pip install requests


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


## リポジトリをクローン

In [105]:
import os
import subprocess
import sys

# --- 設定項目 ---

# 1. クローンしたいリポジトリのURLリストが書かれたファイル
url_file_path = 'url_list.txt'

# 2. クローンしたリポジトリを保存するディレクトリ名
clone_to_directory = 'cloned_repositories'


# --- 処理の開始 ---
print(f"--- Starting: Cloning repositories into '{clone_to_directory}' ---")

# 保存先ディレクトリが存在しない場合は作成
os.makedirs(clone_to_directory, exist_ok=True)
print("-" * 50)

# URLリストを読み込む
try:
    with open(url_file_path, 'r') as file:
        # 空行を除外してリスト化
        urls = [line.strip() for line in file.readlines() if line.strip()]
except FileNotFoundError:
    print(f"❌ Error: '{url_file_path}' was not found.")
    print("Please make sure the file exists in the same directory as the notebook.")
    urls = [] # エラーがあった場合はリストを空にする

# 各URLに対してループ処理
for repo_url in urls:
    # ★★★ ここからが修正箇所 ★★★

    # 1. URLからリポジトリ名を取得 (例: 'requests')
    repo_name = repo_url.split('/')[-1].replace('.git', '')
    
    # 2. クローン先に作られるであろうディレクトリのフルパスを構築
    repo_path = os.path.join(clone_to_directory, repo_name)
    
    # 3. ディレクトリが既に存在するか確認
    if os.path.isdir(repo_path):
        print(f"🟢 Skipping: {repo_name} (Directory already exists)")
        print("-" * 50)
        continue  # 存在する場合は、このURLの処理をスキップして次のループに進む

    # ★★★ 修正箇所ここまで ★★★

    print(f"Cloning: {repo_url}")
    try:
        # Popenでプロセスを開始し、出力をパイプで受け取る
        # --progressフラグで進捗表示を強制
        process = subprocess.Popen(
            ['git', 'clone', '--progress', repo_url],
            cwd=clone_to_directory,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE, # 進捗はこちらに出力される
            text=True,
            encoding='utf-8',
            errors='replace' # エンコーディングエラーを置換文字で回避
        )

        # 標準エラー(stderr)をリアルタイムで読み込んで進捗を表示
        while process.poll() is None:
            line = process.stderr.readline()
            if line:
                # 行末の改行を削除し、\rでカーソルを先頭に戻して上書き表示
                print(f"   {line.strip()}", end='\r')
        
        # プロセス完了後、進捗表示の行をクリアする
        print(" " * 80, end="\r")

        # プロセスの終了コードを確認
        if process.returncode == 0:
            print("✅ Clone successful.")
        else:
            # エラーが発生した場合、残りのエラー出力を取得して表示
            stdout_err, stderr_err = process.communicate()
            error_message = stderr_err if stderr_err else stdout_err
            print(f"❌ Error cloning. Reason: {error_message.strip()}")

    except FileNotFoundError:
        print("❌ Error: 'git' command not found. Please install Git and ensure it is in your system's PATH.")
        break # Gitがない場合は処理を中断
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        print("-" * 50)

print("Clone process finished.")

--- Starting: Cloning repositories into 'cloned_repositories' ---
--------------------------------------------------
🟢 Skipping: sbom-python-test (Directory already exists)
--------------------------------------------------
🟢 Skipping: sbom-go-test (Directory already exists)
--------------------------------------------------
🟢 Skipping: Python (Directory already exists)
--------------------------------------------------
🟢 Skipping: python-mini-projects (Directory already exists)
--------------------------------------------------
Clone process finished.


## sbom-toolのsbomを生成

In [106]:
import os
import subprocess
import re
import shutil

# --- 設定項目 ---
clone_to_directory = 'cloned_repositories'
sbom_output_directory = 'generated_sboms'

# --- 処理の開始 ---
print(f"--- Starting: Generating and moving SBOMs ---")
print("-" * 50)

original_path = os.getcwd()
os.makedirs(sbom_output_directory, exist_ok=True)

try:
    repo_dirs = [d for d in os.listdir(clone_to_directory) if os.path.isdir(os.path.join(clone_to_directory, d))]
    
    if not repo_dirs:
         print("No repositories found to run commands on.")

    for repo_name in repo_dirs:
        # ★★★ ここからが修正箇所 ★★★
        
        # 最終的な保存先パスに 'source' を追加
        final_repo_dir = os.path.join(original_path, sbom_output_directory, repo_name)
        final_manifest_path = os.path.join(final_repo_dir, 'source', '_manifest') # 'source' を追加

        # ★★★ 修正箇所ここまで ★★★

        if os.path.isdir(final_manifest_path):
            print(f"🟢 Skipping: {repo_name} (SBOM manifest already exists)")
            print("-" * 50)
            continue

        repo_path = os.path.join(clone_to_directory, repo_name)
        print(f"▶️  Entering: {repo_name}")
        
        try:
            os.chdir(repo_path)
            
            # --- パラメータ取得 ---
            package_name = repo_name
            git_version_result = subprocess.run(['git', 'rev-parse', 'HEAD'], capture_output=True, text=True, check=True)
            package_version = git_version_result.stdout.strip()
            
            package_supplier = "Unknown"
            with open('.git/config', 'r') as config_file:
                match = re.search(r'url\s*=\s*https?://github\.com/([^/]+)/', config_file.read())
                if match:
                    package_supplier = match.group(1)
            
            print(f"   Package Name: {package_name}")
            print(f"   Package Version: {package_version[:12]}...")
            print(f"   Package Supplier: {package_supplier}")
            
            if os.path.isdir('_manifest'):
                print("   Found existing '_manifest' directory. Removing it.")
                shutil.rmtree('_manifest')

            # --- sbom-tool実行 ---
            command = [
                'sbom-tool', 'generate', '-bc', '.', '-b', '.',
                '-pn', package_name, '-pv', package_version, '-ps', package_supplier
            ]
            print(f"   Executing: {' '.join(command)}")
            subprocess.run(command, check=True, capture_output=True, text=True)
            print("✅ SBOM generation successful.")

            # --- 移動処理 ---
            source_manifest_path = '_manifest'
            
            if os.path.isdir(source_manifest_path):
                # ★★★ ここからが修正箇所 ★★★
                
                # 'source' ディレクトリを含む最終的な保存先ディレクトリを作成
                destination_dir = os.path.join(final_repo_dir, 'source')
                os.makedirs(destination_dir, exist_ok=True)
                
                # 作成した 'source' ディレクトリの中に '_manifest' を移動
                print(f"   Moving '{source_manifest_path}' into: {destination_dir}")
                shutil.move(source_manifest_path, destination_dir)
                print("✅ Move successful.")
                
                # ★★★ 修正箇所ここまで ★★★
            else:
                print(f"⚠️ Warning: Could not find generated manifest directory at '{source_manifest_path}'")

        except FileNotFoundError:
            print(f"❌ Error: The command 'sbom-tool' or 'git' was not found.")
            break
        except subprocess.CalledProcessError as e:
            print(f"❌ Error executing command in {repo_name}.")
            if e.stdout:
                print(f"   [stdout]:\n{e.stdout.strip()}")
            if e.stderr:
                print(f"   [stderr]:\n{e.stderr.strip()}")
        finally:
            os.chdir(original_path)
            print("-" * 50)

except FileNotFoundError:
    print(f"❌ Error: The directory '{clone_to_directory}' was not found.")

print("All processes finished.")

--- Starting: Generating and moving SBOMs ---
--------------------------------------------------
🟢 Skipping: sbom-python-test (SBOM manifest already exists)
--------------------------------------------------
🟢 Skipping: Python (SBOM manifest already exists)
--------------------------------------------------
🟢 Skipping: python-mini-projects (SBOM manifest already exists)
--------------------------------------------------
🟢 Skipping: sbom-go-test (SBOM manifest already exists)
--------------------------------------------------
All processes finished.


## syftのsbom生成

In [107]:
import os
import subprocess
import re
import shutil

# --- 設定項目 ---
clone_to_directory = 'cloned_repositories'
sbom_output_directory = 'generated_sboms'

# --- 処理の開始 ---
print(f"--- Starting: Generating SBOMs with Syft ---")
print("-" * 50)

original_path = os.getcwd()
os.makedirs(sbom_output_directory, exist_ok=True)

try:
    repo_dirs = [d for d in os.listdir(clone_to_directory) if os.path.isdir(os.path.join(clone_to_directory, d))]
    
    if not repo_dirs:
         print("No repositories found to run commands on.")

    for repo_name in repo_dirs:
        repo_path = os.path.join(clone_to_directory, repo_name)
        
        try:
            # --- 最終的なファイルパスを定義 ---
            destination_dir = os.path.join(original_path, sbom_output_directory, repo_name, 'source')
            
            # ★★★ ここからが修正箇所 ★★★
            # ファイル名を 'syft-sbom.json' に固定
            final_sbom_filename = "syft-sbom.json"
            # ★★★ 修正箇所ここまで ★★★
            
            final_sbom_path = os.path.join(destination_dir, final_sbom_filename)

            # --- 処理をスキップするかの判定 ---
            if os.path.exists(final_sbom_path):
                print(f"🟢 Skipping: {repo_name} (SBOM file already exists)")
                print("-" * 50)
                continue

            # --- ここからリポジトリ内での処理 ---
            print(f"▶️  Entering: {repo_name}")
            os.chdir(repo_path)
            
            # --- syftコマンドの実行 ---
            temp_sbom_filename = 'syft-sbom.json'
            command = ['syft', 'dir:./', '-o', 'spdx-json']
            print(f"   Executing: {' '.join(command)} > {temp_sbom_filename}")
            
            result = subprocess.run(
                command, check=True, capture_output=True, text=True
            )
            print("✅ Syft execution successful.")

            with open(temp_sbom_filename, 'w', encoding='utf-8') as f:
                f.write(result.stdout)

            # --- 一時ファイルを最終的な場所に移動 ---
            os.makedirs(destination_dir, exist_ok=True)
            print(f"   Moving SBOM to: {final_sbom_path}")
            shutil.move(temp_sbom_filename, final_sbom_path)
            print("✅ SBOM file saved.")

        except FileNotFoundError:
            print(f"❌ Error: The command 'syft' or 'git' was not found.")
            break
        except subprocess.CalledProcessError as e:
            print(f"❌ Error executing command in {repo_name}.")
            if e.stdout:
                print(f"   [stdout]:\n{e.stdout.strip()}")
            if e.stderr:
                print(f"   [stderr]:\n{e.stderr.strip()}")
        finally:
            os.chdir(original_path)
            print("-" * 50)

except FileNotFoundError:
    print(f"❌ Error: The directory '{clone_to_directory}' was not found.")

print("All processes finished.")

--- Starting: Generating SBOMs with Syft ---
--------------------------------------------------
🟢 Skipping: sbom-python-test (SBOM file already exists)
--------------------------------------------------
--------------------------------------------------
🟢 Skipping: Python (SBOM file already exists)
--------------------------------------------------
--------------------------------------------------
🟢 Skipping: python-mini-projects (SBOM file already exists)
--------------------------------------------------
--------------------------------------------------
🟢 Skipping: sbom-go-test (SBOM file already exists)
--------------------------------------------------
--------------------------------------------------
All processes finished.


## dependency graphのsbom取得

In [108]:
import os
import requests
import re
import json

# --- 設定項目 ---

# 1. GitHubリポジトリのURLリストが書かれたファイル
url_file_path = 'url_list.txt'

# 2. 生成されたSBOM(JSONファイル)を保存するディレクトリ
sbom_output_directory = 'generated_sboms'


# --- 処理の開始 ---
print("--- Starting: Fetching SBOMs from GitHub API (No Token) ---")
print("-" * 50)

original_path = os.getcwd()
os.makedirs(sbom_output_directory, exist_ok=True)

try:
    with open(url_file_path, 'r') as file:
        urls = [line.strip() for line in file.readlines() if line.strip()]
except FileNotFoundError:
    print(f"❌ Error: '{url_file_path}' was not found.")
    urls = []

for repo_url in urls:
    # URLからownerとrepoを抽出
    match = re.search(r"github\.com/([^/]+)/([^/.]+)", repo_url)
    if not match:
        print(f"⚠️ Warning: Could not parse owner/repo from URL: {repo_url}")
        continue
    
    owner, repo_name = match.groups()
    
    # --- 保存先のパスを定義し、スキップ判定 ---
    destination_dir = os.path.join(original_path, sbom_output_directory, repo_name, 'source')
    final_sbom_path = os.path.join(destination_dir, 'dependency-graph-sbom.json')

    if os.path.exists(final_sbom_path):
        print(f"🟢 Skipping: {repo_name} (SBOM file already exists)")
        print("-" * 50)
        continue
        
    print(f"▶️  Fetching SBOM for: {owner}/{repo_name}")

    # --- GitHub APIへのリクエスト (認証ヘッダーなし) ---
    api_url = f"https://api.github.com/repos/{owner}/{repo_name}/dependency-graph/sbom"
    headers = {
        "Accept": "application/vnd.github+json",
        "X-GitHub-Api-Version": "2022-11-28"
    }

    try:
        response = requests.get(api_url, headers=headers)
        
        if response.status_code == 200:
            print("✅ API request successful.")
            sbom_data = response.json().get('sbom')
            if not sbom_data:
                print(f"❌ Error: 'sbom' key not found in the API response for {repo_name}.")
                continue

            # --- SBOMをファイルに保存 ---
            os.makedirs(destination_dir, exist_ok=True)
            print(f"   Writing SBOM to: {final_sbom_path}")
            
            with open(final_sbom_path, 'w', encoding='utf-8') as f:
                json.dump(sbom_data, f, ensure_ascii=False, indent=2)
            print("✅ SBOM file saved.")

        elif response.status_code == 404:
            print(f"⚠️ Warning: Could not fetch SBOM for {repo_name}. (Status: 404)")
            print("   The repository may not exist, or the Dependency Graph may not be enabled.")
        
        else:
            print(f"❌ Error: Failed to fetch SBOM for {repo_name}. Status code: {response.status_code}")
            print(f"   Response: {response.text}")

    except requests.exceptions.RequestException as e:
        print(f"❌ Error: A network error occurred while contacting the GitHub API.")
        print(f"   Details: {e}")
    
    finally:
        print("-" * 50)

print("All processes finished.")

--- Starting: Fetching SBOMs from GitHub API (No Token) ---
--------------------------------------------------
🟢 Skipping: sbom-python-test (SBOM file already exists)
--------------------------------------------------
🟢 Skipping: sbom-go-test (SBOM file already exists)
--------------------------------------------------
🟢 Skipping: Python (SBOM file already exists)
--------------------------------------------------
🟢 Skipping: python-mini-projects (SBOM file already exists)
--------------------------------------------------
All processes finished.


## sbom-toolのsbomをもとにして作成するためソートしてコピー

In [109]:
import os
import shutil
import json

# --- 設定項目 ---

# 1. 処理対象の親ディレクトリ
target_directory = 'generated_sboms'

# 2. 生成するファイル名
target_filename = 'combined_sbom.json'

# 3. 並び替えたいキーの順番
key_order = [
    "SPDXID",
    "spdxVersion",
    "creationInfo",
    "name",
    "dataLicense",
    "documentNamespace",
    "documentDescribes",
    "externalDocumentRefs",
    "packages",
    "files",
    "relationships",
]


# --- 処理の開始 ---
print(f"--- Starting: Copying and reordering SBOMs ---")
print("-" * 50)

try:
    # 'generated_sboms' 内のリポジトリ名を取得
    repo_dirs = [d for d in os.listdir(target_directory) if os.path.isdir(os.path.join(target_directory, d))]
    
    if not repo_dirs:
         print(f"No repository directories found in '{target_directory}'.")

    for repo_name in repo_dirs:
        print(f"▶️  Processing: {repo_name}")

        # --- パスの定義 ---
        source_sbom_path = os.path.join(target_directory, repo_name, 'source', '_manifest', 'spdx_2.2', 'manifest.spdx.json')
        destination_sbom_path = os.path.join(target_directory, repo_name, target_filename)

        # --- スキップ判定 ---
        if os.path.exists(destination_sbom_path):
            print(f"🟢 Skipping: '{target_filename}' already exists.")
            print("-" * 50)
            continue
        
        # --- コピー元の存在確認 ---
        if not os.path.exists(source_sbom_path):
            print(f"⚠️ Warning: sbom-tool SBOM not found for {repo_name}. Skipping.")
            print("-" * 50)
            continue

        try:
            # --- ステップ1: コピー ---
            print(f"   Copying sbom-tool's output to '{target_filename}'...")
            shutil.copy(source_sbom_path, destination_sbom_path)
            print("   ✅ Copy successful.")

            # --- ステップ2: 読み込みと並び替え ---
            with open(destination_sbom_path, 'r', encoding='utf-8') as f:
                original_data = json.load(f)

            ordered_data = {}
            # 指定された順でキーを追加
            for key in key_order:
                if key in original_data:
                    ordered_data[key] = original_data[key]
            # 残りのキーを末尾に追加
            for key, value in original_data.items():
                if key not in ordered_data:
                    ordered_data[key] = value

            # --- ステップ3: 整形して上書き保存 ---
            print(f"   Reordering keys...")
            with open(destination_sbom_path, 'w', encoding='utf-8') as f:
                json.dump(ordered_data, f, indent=2, ensure_ascii=False)
            print(f"   ✅ Keys reordered and saved.")

        except json.JSONDecodeError:
            print(f"❌ Error: Could not parse source JSON file. It may be corrupted.")
        except Exception as e:
            print(f"❌ An unexpected error occurred: {e}")
        
        finally:
            print("-" * 50)

except FileNotFoundError:
    print(f"❌ Error: The source directory '{target_directory}' was not found.")

print("All processes finished.")

--- Starting: Copying and reordering SBOMs ---
--------------------------------------------------
▶️  Processing: sbom-python-test
   Copying sbom-tool's output to 'combined_sbom.json'...
   ✅ Copy successful.
   Reordering keys...
   ✅ Keys reordered and saved.
--------------------------------------------------
▶️  Processing: Python
🟢 Skipping: 'combined_sbom.json' already exists.
--------------------------------------------------
▶️  Processing: python-mini-projects
   Copying sbom-tool's output to 'combined_sbom.json'...
   ✅ Copy successful.
   Reordering keys...
   ✅ Keys reordered and saved.
--------------------------------------------------
▶️  Processing: sbom-go-test
   Copying sbom-tool's output to 'combined_sbom.json'...
   ✅ Copy successful.
   Reordering keys...
   ✅ Keys reordered and saved.
--------------------------------------------------
All processes finished.


## dependency-graphから情報を補完

licence decler
,,conclude
copyright text 
creator追加


In [110]:
import os
import json

# --- 設定項目 ---
target_directory = 'generated_sboms'
base_sbom_filename = 'combined_sbom.json'
source_sbom_filename = 'dependency-graph-sbom.json'


# --- 処理の開始 ---
print(f"--- Starting: Supplementing '{base_sbom_filename}' with richer data and comments ---")
print("-" * 50)

try:
    # 'generated_sboms' 内のリポジトリ名を取得
    repo_dirs = [d for d in os.listdir(target_directory) if os.path.isdir(os.path.join(target_directory, d))]

    if not repo_dirs:
        print(f"No repository directories found in '{target_directory}'.")

    for repo_name in repo_dirs:
        print(f"▶️  Processing: {repo_name}")

        # --- パスの定義 ---
        base_sbom_path = os.path.join(target_directory, repo_name, base_sbom_filename)
        source_sbom_path = os.path.join(target_directory, repo_name, 'source', source_sbom_filename)

        # --- ファイルの存在確認 ---
        if not os.path.exists(base_sbom_path):
            print(f"⚠️ Warning: Base file '{base_sbom_filename}' not found. Skipping.")
            print("-" * 50)
            continue
        if not os.path.exists(source_sbom_path):
            print(f"⚠️ Warning: Source file '{source_sbom_filename}' not found. Skipping.")
            print("-" * 50)
            continue

        try:
            # --- ステップ1: 両方のSBOMファイルを読み込む ---
            with open(base_sbom_path, 'r', encoding='utf-8') as f:
                base_data = json.load(f)
            with open(source_sbom_path, 'r', encoding='utf-8') as f:
                source_data = json.load(f)

            changes_made = 0

            # --- ステップ2: packagesセクションの情報を補完 ---
            source_package_map = {}
            if 'packages' in source_data:
                for pkg in source_data['packages']:
                    purl = None
                    if 'externalRefs' in pkg:
                        for ref in pkg['externalRefs']:
                            if ref.get('referenceType') == 'purl':
                                purl = ref.get('referenceLocator')
                                break
                    if purl:
                        source_package_map[purl] = {
                            'licenseConcluded': pkg.get('licenseConcluded', 'NOASSERTION'),
                            'copyrightText': pkg.get('copyrightText', 'NOASSERTION')
                        }

            if 'packages' in base_data:
                for pkg in base_data['packages']:
                    purl = None
                    if 'externalRefs' in pkg:
                        for ref in pkg['externalRefs']:
                            if ref.get('referenceType') == 'purl':
                                purl = ref.get('referenceLocator')
                                break

                    if purl and purl in source_package_map:
                        source_pkg_info = source_package_map[purl]

                        if pkg.get('licenseConcluded') == 'NOASSERTION' and source_pkg_info['licenseConcluded'] != 'NOASSERTION':
                            pkg['licenseConcluded'] = source_pkg_info['licenseConcluded']
                            changes_made += 1
                            print(f"   Updated license for: {pkg.get('name')}")

                        if pkg.get('copyrightText') == 'NOASSERTION' and source_pkg_info['copyrightText'] != 'NOASSERTION':
                            pkg['copyrightText'] = source_pkg_info['copyrightText']
                            changes_made += 1
                            print(f"   Updated copyright for: {pkg.get('name')}")
            
            # ★★★ ここからが追加した処理 ★★★
            # --- ステップ3: 不足しているパッケージを「パッケージ名」で判定して追加 ---
            if 'packages' in source_data:
                # ベースのパッケージ名をセットに格納
                base_pkg_names = {pkg.get('name') for pkg in base_data.get('packages', []) if pkg.get('name')}
                new_pkgs_added = 0
                
                for source_pkg in source_data.get('packages', []):
                    pkg_name = source_pkg.get('name')
                    # パッケージ名が存在し、かつベースに存在しない場合のみ追加
                    if pkg_name and pkg_name not in base_pkg_names:
                        base_data.setdefault('packages', []).append(source_pkg)
                        base_pkg_names.add(pkg_name) # 追加した名前をセットに加えて重複を防ぐ
                        new_pkgs_added += 1
                        print(f"   Added new package from dependency-graph: {pkg_name}")
                
                if new_pkgs_added > 0:
                    changes_made += new_pkgs_added
                    print(f"   A total of {new_pkgs_added} new packages were added.")
            # ★★★ 追加した処理ここまで ★★★

            # --- ステップ4: creationInfo.creators の情報を追記 ---
            if 'creationInfo' in source_data and 'creators' in source_data['creationInfo'] and \
               'creationInfo' in base_data and 'creators' in base_data['creationInfo']:

                base_creators = base_data['creationInfo']['creators']

                for creator in source_data['creationInfo']['creators']:
                    if creator not in base_creators:
                        base_creators.append(creator)
                        changes_made += 1
                        print(f"   Added creator: {creator}")

            # --- ステップ5: コメント追記機能 ---
            source_doc_comment = source_data.get('comment')
            if source_doc_comment:
                comment_header = "Note from dependency-graph"
                full_comment_to_add = f"{comment_header}: {source_doc_comment}"
                if 'comment' not in base_data or not base_data.get('comment'):
                    base_data['comment'] = full_comment_to_add
                    changes_made += 1
                    print("   Added document-level comment.")
                elif full_comment_to_add not in base_data['comment']:
                    base_data['comment'] += f"\\n\\n{full_comment_to_add}"
                    changes_made += 1
                    print("   Appended document-level comment.")

            # --- ステップ6: 変更があった場合のみファイルを上書き保存 ---
            if changes_made > 0:
                print(f"   {changes_made} fields were updated/added. Reordering and saving file...")

                key_order = [
                    "SPDXID", "spdxVersion", "creationInfo", "name", "dataLicense",
                    "documentNamespace", "comment", "documentDescribes", "externalDocumentRefs",
                    "packages", "files", "relationships"
                ]
                ordered_data = {key: base_data[key] for key in key_order if key in base_data}
                ordered_data.update({key: value for key, value in base_data.items() if key not in ordered_data})

                with open(base_sbom_path, 'w', encoding='utf-8') as f:
                    json.dump(ordered_data, f, indent=2, ensure_ascii=False)
                print(f"✅ Successfully supplemented '{base_sbom_filename}'.")
            else:
                print("   No fields needed updating or adding.")

        except json.JSONDecodeError as e:
            print(f"❌ Error: Could not parse a JSON file. Details: {e}")
        except Exception as e:
            print(f"❌ An unexpected error occurred: {e}")

        finally:
            print("-" * 50)

except FileNotFoundError:
    print(f"❌ Error: The top-level directory '{target_directory}' was not found.")

print("All processes finished.")

--- Starting: Supplementing 'combined_sbom.json' with richer data and comments ---
--------------------------------------------------


▶️  Processing: sbom-python-test
   Updated license for: pyfiglet
   Updated copyright for: pyfiglet
   Updated license for: colorama
   Updated copyright for: colorama
   Added new package from dependency-graph: com.github.MoriwakiYusuke/sbom-python-test
   A total of 1 new packages were added.
   Added creator: Tool: protobom-v0.0.0-20251017101805-28c6776db0cf+dirty
   Added creator: Tool: GitHub.com-Dependency-Graph
   7 fields were updated/added. Reordering and saving file...
✅ Successfully supplemented 'combined_sbom.json'.
--------------------------------------------------
▶️  Processing: Python
   No fields needed updating or adding.
--------------------------------------------------
▶️  Processing: python-mini-projects
   Updated license for: pycryptodome
   Updated license for: idna
   Updated license for: click
   Updated copyright for: click
   Updated license for: webdriver-manager
   Updated license for: requests
   Updated copyright for: requests
   Updated copyright for:

## syft

In [111]:
import os
import json

def get_purl_from_package(pkg):
    """パッケージ情報からpurl（Package URL）を抽出します。"""
    if 'externalRefs' in pkg:
        for ref in pkg['externalRefs']:
            if ref.get('referenceType') == 'purl':
                return ref.get('referenceLocator')
    return None

# --- 設定項目 ---
target_directory = 'generated_sboms'
base_sbom_filename = 'combined_sbom.json'
# syftで生成したファイル名を指定
source_sbom_filename = 'syft-sbom.json'

# --- 処理の開始 ---
print(f"--- Starting: Supplementing '{base_sbom_filename}' with data from syft ---")
print("-" * 50)

try:
    repo_dirs = [d for d in os.listdir(target_directory) if os.path.isdir(os.path.join(target_directory, d))]
    
    if not repo_dirs:
        print(f"No repository directories found in '{target_directory}'.")

    for repo_name in repo_dirs:
        print(f"▶️  Processing: {repo_name}")

        # --- パスの定義 ---
        base_sbom_path = os.path.join(target_directory, repo_name, base_sbom_filename)
        source_sbom_path = os.path.join(target_directory, repo_name, 'source', source_sbom_filename)

        # --- ファイルの存在確認 ---
        if not os.path.exists(base_sbom_path) or not os.path.exists(source_sbom_path):
            print(f"⚠️ Warning: One or both SBOM files are missing. Skipping.")
            print("-" * 50)
            continue

        try:
            # --- ステップ1: 両方のSBOMファイルを読み込む ---
            with open(base_sbom_path, 'r', encoding='utf-8') as f:
                base_data = json.load(f)
            with open(source_sbom_path, 'r', encoding='utf-8') as f:
                source_data = json.load(f)

            initial_data_str = json.dumps(base_data, sort_keys=True)
            changes_made = 0

            # --- ステップ2: syftのパッケージ情報をpurlをキーにした辞書に整理 ---
            source_package_map = {}
            if 'packages' in source_data:
                for pkg in source_data['packages']:
                    purl = get_purl_from_package(pkg)
                    if purl and purl not in source_package_map:
                        source_package_map[purl] = pkg

            # --- ステップ3: 既存パッケージの情報を補完 ---
            base_purls = set()
            if 'packages' in base_data:
                for pkg in base_data['packages']:
                    purl = get_purl_from_package(pkg)
                    if purl:
                        base_purls.add(purl)
                        if purl in source_package_map:
                            source_pkg = source_package_map[purl]
                            
                            # サプライヤー情報を補完
                            if (not pkg.get('supplier') or pkg.get('supplier') == 'NOASSERTION') and \
                               source_pkg.get('supplier') and source_pkg.get('supplier') != 'NOASSERTION':
                                pkg['supplier'] = source_pkg['supplier']
                                changes_made += 1
                            
                            # 作成元情報を補完
                            if (not pkg.get('originator') or pkg.get('originator') == 'NOASSERTION') and \
                               source_pkg.get('originator') and source_pkg.get('originator') != 'NOASSERTION':
                                pkg['originator'] = source_pkg['originator']
                                changes_made += 1

                            # 検出元情報を補完
                            if 'sourceInfo' not in pkg and source_pkg.get('sourceInfo'):
                                pkg['sourceInfo'] = source_pkg['sourceInfo']
                                changes_made += 1
                            
                            # 外部リンク(CPEなど)を追記
                            if 'externalRefs' not in pkg: pkg['externalRefs'] = []
                            existing_locators = {ref.get('referenceLocator') for ref in pkg['externalRefs']}
                            for new_ref in source_pkg.get('externalRefs', []):
                                if new_ref.get('referenceLocator') not in existing_locators:
                                    pkg['externalRefs'].append(new_ref)
                                    changes_made += 1

            # --- ステップ4: 不足しているパッケージを「パッケージ名」で判定して追加 ---
            if 'packages' in source_data:
                base_pkg_names = {pkg.get('name') for pkg in base_data.get('packages', []) if pkg.get('name')}
                new_pkgs_added = 0
                
                for source_pkg in source_data.get('packages', []):
                    pkg_name = source_pkg.get('name')
                    if pkg_name and pkg_name not in base_pkg_names:
                        base_data.setdefault('packages', []).append(source_pkg)
                        base_pkg_names.add(pkg_name)
                        new_pkgs_added += 1
                
                if new_pkgs_added > 0:
                    changes_made += new_pkgs_added

            # --- ステップ5: syftのツール情報をcreatorsに追加 ---
            if 'creationInfo' in source_data and 'creators' in source_data['creationInfo']:
                base_creators = base_data.setdefault('creationInfo', {}).setdefault('creators', [])
                for creator in source_data['creationInfo']['creators']:
                    if creator not in base_creators:
                        base_creators.append(creator)
                        changes_made += 1
            
            # --- ステップ6: 変更があった場合のみファイルを保存 ---
            if initial_data_str != json.dumps(base_data, sort_keys=True):
                print(f"   Changes detected. Reordering keys and saving file...")

                # ★★★ ここが追加されたキーの並び替え機能 ★★★
                key_order = [
                    "SPDXID", "spdxVersion", "creationInfo", "name", "dataLicense",
                    "documentNamespace", "comment", "documentDescribes", "externalDocumentRefs",
                    "packages", "files", "relationships"
                ]
                ordered_data = {key: base_data[key] for key in key_order if key in base_data}
                ordered_data.update({key: value for key, value in base_data.items() if key not in ordered_data})

                with open(base_sbom_path, 'w', encoding='utf-8') as f:
                    json.dump(ordered_data, f, indent=2, ensure_ascii=False)
                print(f"✅ Successfully supplemented and reordered '{base_sbom_filename}'.")
            else:
                print("   No new information to supplement from syft.")

        except (json.JSONDecodeError, KeyError) as e:
            print(f"❌ Error processing files for {repo_name}. Details: {e}")
        
        finally:
            print("-" * 50)

except FileNotFoundError:
    print(f"❌ Error: The top-level directory '{target_directory}' was not found.")

print("All processes finished.")

--- Starting: Supplementing 'combined_sbom.json' with data from syft ---
--------------------------------------------------
▶️  Processing: sbom-python-test
   Changes detected. Reordering keys and saving file...
✅ Successfully supplemented and reordered 'combined_sbom.json'.
--------------------------------------------------
▶️  Processing: Python
   No new information to supplement from syft.
--------------------------------------------------
▶️  Processing: python-mini-projects
   Changes detected. Reordering keys and saving file...
✅ Successfully supplemented and reordered 'combined_sbom.json'.
--------------------------------------------------
▶️  Processing: sbom-go-test
   Changes detected. Reordering keys and saving file...
✅ Successfully supplemented and reordered 'combined_sbom.json'.
--------------------------------------------------
All processes finished.
