## モジュールのインポート

In [32]:
!pip install requests

Collecting requests
  Using cached requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting charset_normalizer<4,>=2 (from requests)
  Using cached charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (37 kB)
Collecting idna<4,>=2.5 (from requests)
  Using cached idna-3.11-py3-none-any.whl.metadata (8.4 kB)
Collecting urllib3<3,>=1.21.1 (from requests)
  Using cached urllib3-2.5.0-py3-none-any.whl.metadata (6.5 kB)
Collecting certifi>=2017.4.17 (from requests)
  Using cached certifi-2025.10.5-py3-none-any.whl.metadata (2.5 kB)
Using cached requests-2.32.5-py3-none-any.whl (64 kB)
Using cached charset_normalizer-3.4.4-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (153 kB)
Using cached idna-3.11-py3-none-any.whl (71 kB)
Using cached urllib3-2.5.0-py3-none-any.whl (129 kB)
Using cached certifi-2025.10.5-py3-none-any.whl (163 kB)
Installing collected packages: urllib3, idna, charset_normal

## リポジトリをクローン

In [29]:
import os
import subprocess
import sys

# --- 設定項目 ---

# 1. クローンしたいリポジトリのURLリストが書かれたファイル
url_file_path = 'url_list.txt'

# 2. クローンしたリポジトリを保存するディレクトリ名
clone_to_directory = 'cloned_repositories'


# --- 処理の開始 ---
print(f"--- Starting: Cloning repositories into '{clone_to_directory}' ---")

# 保存先ディレクトリが存在しない場合は作成
os.makedirs(clone_to_directory, exist_ok=True)
print("-" * 50)

# URLリストを読み込む
try:
    with open(url_file_path, 'r') as file:
        # 空行を除外してリスト化
        urls = [line.strip() for line in file.readlines() if line.strip()]
except FileNotFoundError:
    print(f"❌ Error: '{url_file_path}' was not found.")
    print("Please make sure the file exists in the same directory as the notebook.")
    urls = [] # エラーがあった場合はリストを空にする

# 各URLに対してループ処理
for repo_url in urls:
    # ★★★ ここからが修正箇所 ★★★

    # 1. URLからリポジトリ名を取得 (例: 'requests')
    repo_name = repo_url.split('/')[-1].replace('.git', '')
    
    # 2. クローン先に作られるであろうディレクトリのフルパスを構築
    repo_path = os.path.join(clone_to_directory, repo_name)
    
    # 3. ディレクトリが既に存在するか確認
    if os.path.isdir(repo_path):
        print(f"🟢 Skipping: {repo_name} (Directory already exists)")
        print("-" * 50)
        continue  # 存在する場合は、このURLの処理をスキップして次のループに進む

    # ★★★ 修正箇所ここまで ★★★

    print(f"Cloning: {repo_url}")
    try:
        # Popenでプロセスを開始し、出力をパイプで受け取る
        # --progressフラグで進捗表示を強制
        process = subprocess.Popen(
            ['git', 'clone', '--progress', repo_url],
            cwd=clone_to_directory,
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE, # 進捗はこちらに出力される
            text=True,
            encoding='utf-8',
            errors='replace' # エンコーディングエラーを置換文字で回避
        )

        # 標準エラー(stderr)をリアルタイムで読み込んで進捗を表示
        while process.poll() is None:
            line = process.stderr.readline()
            if line:
                # 行末の改行を削除し、\rでカーソルを先頭に戻して上書き表示
                print(f"   {line.strip()}", end='\r')
        
        # プロセス完了後、進捗表示の行をクリアする
        print(" " * 80, end="\r")

        # プロセスの終了コードを確認
        if process.returncode == 0:
            print("✅ Clone successful.")
        else:
            # エラーが発生した場合、残りのエラー出力を取得して表示
            stdout_err, stderr_err = process.communicate()
            error_message = stderr_err if stderr_err else stdout_err
            print(f"❌ Error cloning. Reason: {error_message.strip()}")

    except FileNotFoundError:
        print("❌ Error: 'git' command not found. Please install Git and ensure it is in your system's PATH.")
        break # Gitがない場合は処理を中断
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    finally:
        print("-" * 50)

print("Clone process finished.")

--- Starting: Cloning repositories into 'cloned_repositories' ---
--------------------------------------------------
🟢 Skipping: sbom-python-test (Directory already exists)
--------------------------------------------------
🟢 Skipping: sbom-go-test (Directory already exists)
--------------------------------------------------
🟢 Skipping: Python (Directory already exists)
--------------------------------------------------
🟢 Skipping: python-mini-projects (Directory already exists)
--------------------------------------------------
Clone process finished.


## sbom-toolのsbomを生成

In [30]:
import os
import subprocess
import re
import shutil

# --- 設定項目 ---
clone_to_directory = 'cloned_repositories'
sbom_output_directory = 'generated_sboms'

# --- 処理の開始 ---
print(f"--- Starting: Generating and moving SBOMs ---")
print("-" * 50)

original_path = os.getcwd()
os.makedirs(sbom_output_directory, exist_ok=True)

try:
    repo_dirs = [d for d in os.listdir(clone_to_directory) if os.path.isdir(os.path.join(clone_to_directory, d))]
    
    if not repo_dirs:
         print("No repositories found to run commands on.")

    for repo_name in repo_dirs:
        # ★★★ ここからが修正箇所 ★★★
        
        # 最終的な保存先パスに 'source' を追加
        final_repo_dir = os.path.join(original_path, sbom_output_directory, repo_name)
        final_manifest_path = os.path.join(final_repo_dir, 'source', '_manifest') # 'source' を追加

        # ★★★ 修正箇所ここまで ★★★

        if os.path.isdir(final_manifest_path):
            print(f"🟢 Skipping: {repo_name} (SBOM manifest already exists)")
            print("-" * 50)
            continue

        repo_path = os.path.join(clone_to_directory, repo_name)
        print(f"▶️  Entering: {repo_name}")
        
        try:
            os.chdir(repo_path)
            
            # --- パラメータ取得 ---
            package_name = repo_name
            git_version_result = subprocess.run(['git', 'rev-parse', 'HEAD'], capture_output=True, text=True, check=True)
            package_version = git_version_result.stdout.strip()
            
            package_supplier = "Unknown"
            with open('.git/config', 'r') as config_file:
                match = re.search(r'url\s*=\s*https?://github\.com/([^/]+)/', config_file.read())
                if match:
                    package_supplier = match.group(1)
            
            print(f"   Package Name: {package_name}")
            print(f"   Package Version: {package_version[:12]}...")
            print(f"   Package Supplier: {package_supplier}")
            
            if os.path.isdir('_manifest'):
                print("   Found existing '_manifest' directory. Removing it.")
                shutil.rmtree('_manifest')

            # --- sbom-tool実行 ---
            command = [
                'sbom-tool', 'generate', '-bc', '.', '-b', '.',
                '-pn', package_name, '-pv', package_version, '-ps', package_supplier
            ]
            print(f"   Executing: {' '.join(command)}")
            subprocess.run(command, check=True, capture_output=True, text=True)
            print("✅ SBOM generation successful.")

            # --- 移動処理 ---
            source_manifest_path = '_manifest'
            
            if os.path.isdir(source_manifest_path):
                # ★★★ ここからが修正箇所 ★★★
                
                # 'source' ディレクトリを含む最終的な保存先ディレクトリを作成
                destination_dir = os.path.join(final_repo_dir, 'source')
                os.makedirs(destination_dir, exist_ok=True)
                
                # 作成した 'source' ディレクトリの中に '_manifest' を移動
                print(f"   Moving '{source_manifest_path}' into: {destination_dir}")
                shutil.move(source_manifest_path, destination_dir)
                print("✅ Move successful.")
                
                # ★★★ 修正箇所ここまで ★★★
            else:
                print(f"⚠️ Warning: Could not find generated manifest directory at '{source_manifest_path}'")

        except FileNotFoundError:
            print(f"❌ Error: The command 'sbom-tool' or 'git' was not found.")
            break
        except subprocess.CalledProcessError as e:
            print(f"❌ Error executing command in {repo_name}.")
            if e.stdout:
                print(f"   [stdout]:\n{e.stdout.strip()}")
            if e.stderr:
                print(f"   [stderr]:\n{e.stderr.strip()}")
        finally:
            os.chdir(original_path)
            print("-" * 50)

except FileNotFoundError:
    print(f"❌ Error: The directory '{clone_to_directory}' was not found.")

print("All processes finished.")

--- Starting: Generating and moving SBOMs ---
--------------------------------------------------
▶️  Entering: sbom-python-test
   Package Name: sbom-python-test
   Package Version: 1155b7dbcc8e...
   Package Supplier: MoriwakiYusuke
   Executing: sbom-tool generate -bc . -b . -pn sbom-python-test -pv 1155b7dbcc8e694da2e8cd7d6559bc2f835434a8 -ps MoriwakiYusuke
✅ SBOM generation successful.
   Moving '_manifest' into: /home/moriwaki-y/ritsumei/sbom/auto-datasets-make/generated_sboms/sbom-python-test/source
✅ Move successful.
--------------------------------------------------
▶️  Entering: Python
   Package Name: Python
   Package Version: c79034ca2114...
   Package Supplier: TheAlgorithms
   Executing: sbom-tool generate -bc . -b . -pn Python -pv c79034ca2114e56ede887a473c2853b8c6d49257 -ps TheAlgorithms
✅ SBOM generation successful.
   Moving '_manifest' into: /home/moriwaki-y/ritsumei/sbom/auto-datasets-make/generated_sboms/Python/source
✅ Move successful.
----------------------------

## syftのsbom生成

In [26]:
import os
import subprocess
import re
import shutil

# --- 設定項目 ---
clone_to_directory = 'cloned_repositories'
sbom_output_directory = 'generated_sboms'

# --- 処理の開始 ---
print(f"--- Starting: Generating SBOMs with Syft ---")
print("-" * 50)

original_path = os.getcwd()
os.makedirs(sbom_output_directory, exist_ok=True)

try:
    repo_dirs = [d for d in os.listdir(clone_to_directory) if os.path.isdir(os.path.join(clone_to_directory, d))]
    
    if not repo_dirs:
         print("No repositories found to run commands on.")

    for repo_name in repo_dirs:
        repo_path = os.path.join(clone_to_directory, repo_name)
        
        try:
            # --- 最終的なファイルパスを定義 ---
            destination_dir = os.path.join(original_path, sbom_output_directory, repo_name, 'source')
            
            # ★★★ ここからが修正箇所 ★★★
            # ファイル名を 'syft-sbom.json' に固定
            final_sbom_filename = "syft-sbom.json"
            # ★★★ 修正箇所ここまで ★★★
            
            final_sbom_path = os.path.join(destination_dir, final_sbom_filename)

            # --- 処理をスキップするかの判定 ---
            if os.path.exists(final_sbom_path):
                print(f"🟢 Skipping: {repo_name} (SBOM file already exists)")
                print("-" * 50)
                continue

            # --- ここからリポジトリ内での処理 ---
            print(f"▶️  Entering: {repo_name}")
            os.chdir(repo_path)
            
            # --- syftコマンドの実行 ---
            temp_sbom_filename = 'syft-sbom.json'
            command = ['syft', 'dir:./', '-o', 'spdx-json']
            print(f"   Executing: {' '.join(command)} > {temp_sbom_filename}")
            
            result = subprocess.run(
                command, check=True, capture_output=True, text=True
            )
            print("✅ Syft execution successful.")

            with open(temp_sbom_filename, 'w', encoding='utf-8') as f:
                f.write(result.stdout)

            # --- 一時ファイルを最終的な場所に移動 ---
            os.makedirs(destination_dir, exist_ok=True)
            print(f"   Moving SBOM to: {final_sbom_path}")
            shutil.move(temp_sbom_filename, final_sbom_path)
            print("✅ SBOM file saved.")

        except FileNotFoundError:
            print(f"❌ Error: The command 'syft' or 'git' was not found.")
            break
        except subprocess.CalledProcessError as e:
            print(f"❌ Error executing command in {repo_name}.")
            if e.stdout:
                print(f"   [stdout]:\n{e.stdout.strip()}")
            if e.stderr:
                print(f"   [stderr]:\n{e.stderr.strip()}")
        finally:
            os.chdir(original_path)
            print("-" * 50)

except FileNotFoundError:
    print(f"❌ Error: The directory '{clone_to_directory}' was not found.")

print("All processes finished.")

--- Starting: Generating SBOMs with Syft ---
--------------------------------------------------
▶️  Entering: sbom-python-test
   Executing: syft dir:./ -o spdx-json > syft-sbom.json
✅ Syft execution successful.
   Moving SBOM to: /home/moriwaki-y/ritsumei/sbom/auto-datasets-make/generated_sboms/sbom-python-test/source/syft-sbom.json
✅ SBOM file saved.
--------------------------------------------------
▶️  Entering: Python
   Executing: syft dir:./ -o spdx-json > syft-sbom.json
✅ Syft execution successful.
   Moving SBOM to: /home/moriwaki-y/ritsumei/sbom/auto-datasets-make/generated_sboms/Python/source/syft-sbom.json
✅ SBOM file saved.
--------------------------------------------------
▶️  Entering: python-mini-projects
   Executing: syft dir:./ -o spdx-json > syft-sbom.json
✅ Syft execution successful.
   Moving SBOM to: /home/moriwaki-y/ritsumei/sbom/auto-datasets-make/generated_sboms/python-mini-projects/source/syft-sbom.json
✅ SBOM file saved.
-------------------------------------

## dependency graphのsbom取得

In [34]:
import os
import requests
import re
import json

# --- 設定項目 ---

# 1. GitHubリポジトリのURLリストが書かれたファイル
url_file_path = 'url_list.txt'

# 2. 生成されたSBOM(JSONファイル)を保存するディレクトリ
sbom_output_directory = 'generated_sboms'


# --- 処理の開始 ---
print("--- Starting: Fetching SBOMs from GitHub API (No Token) ---")
print("-" * 50)

original_path = os.getcwd()
os.makedirs(sbom_output_directory, exist_ok=True)

try:
    with open(url_file_path, 'r') as file:
        urls = [line.strip() for line in file.readlines() if line.strip()]
except FileNotFoundError:
    print(f"❌ Error: '{url_file_path}' was not found.")
    urls = []

for repo_url in urls:
    # URLからownerとrepoを抽出
    match = re.search(r"github\.com/([^/]+)/([^/.]+)", repo_url)
    if not match:
        print(f"⚠️ Warning: Could not parse owner/repo from URL: {repo_url}")
        continue
    
    owner, repo_name = match.groups()
    
    # --- 保存先のパスを定義し、スキップ判定 ---
    destination_dir = os.path.join(original_path, sbom_output_directory, repo_name, 'source')
    final_sbom_path = os.path.join(destination_dir, 'dependency-graph-sbom.json')

    if os.path.exists(final_sbom_path):
        print(f"🟢 Skipping: {repo_name} (SBOM file already exists)")
        print("-" * 50)
        continue
        
    print(f"▶️  Fetching SBOM for: {owner}/{repo_name}")

    # --- GitHub APIへのリクエスト (認証ヘッダーなし) ---
    api_url = f"https://api.github.com/repos/{owner}/{repo_name}/dependency-graph/sbom"
    headers = {
        "Accept": "application/vnd.github+json",
        "X-GitHub-Api-Version": "2022-11-28"
    }

    try:
        response = requests.get(api_url, headers=headers)
        
        if response.status_code == 200:
            print("✅ API request successful.")
            sbom_data = response.json().get('sbom')
            if not sbom_data:
                print(f"❌ Error: 'sbom' key not found in the API response for {repo_name}.")
                continue

            # --- SBOMをファイルに保存 ---
            os.makedirs(destination_dir, exist_ok=True)
            print(f"   Writing SBOM to: {final_sbom_path}")
            
            with open(final_sbom_path, 'w', encoding='utf-8') as f:
                json.dump(sbom_data, f, ensure_ascii=False, indent=2)
            print("✅ SBOM file saved.")

        elif response.status_code == 404:
            print(f"⚠️ Warning: Could not fetch SBOM for {repo_name}. (Status: 404)")
            print("   The repository may not exist, or the Dependency Graph may not be enabled.")
        
        else:
            print(f"❌ Error: Failed to fetch SBOM for {repo_name}. Status code: {response.status_code}")
            print(f"   Response: {response.text}")

    except requests.exceptions.RequestException as e:
        print(f"❌ Error: A network error occurred while contacting the GitHub API.")
        print(f"   Details: {e}")
    
    finally:
        print("-" * 50)

print("All processes finished.")

--- Starting: Fetching SBOMs from GitHub API (No Token) ---
--------------------------------------------------
▶️  Fetching SBOM for: MoriwakiYusuke/sbom-python-test
✅ API request successful.
   Writing SBOM to: /home/moriwaki-y/ritsumei/sbom/auto-datasets-make/generated_sboms/sbom-python-test/source/dependency-graph-sbom.json
✅ SBOM file saved.
--------------------------------------------------
▶️  Fetching SBOM for: MoriwakiYusuke/sbom-go-test
✅ API request successful.
   Writing SBOM to: /home/moriwaki-y/ritsumei/sbom/auto-datasets-make/generated_sboms/sbom-go-test/source/dependency-graph-sbom.json
✅ SBOM file saved.
--------------------------------------------------
▶️  Fetching SBOM for: TheAlgorithms/Python
✅ API request successful.
   Writing SBOM to: /home/moriwaki-y/ritsumei/sbom/auto-datasets-make/generated_sboms/Python/source/dependency-graph-sbom.json
✅ SBOM file saved.
--------------------------------------------------
▶️  Fetching SBOM for: Python-World/python-mini-projec