In [None]:
import json
import pandas as pd
from pathlib import Path
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
from typing import Dict, List, Any

## SBOM Generation

- Step-1: Install Syft
> curl -sSfL https://raw.githubusercontent.com/anchore/syft/main/install.sh \
| sudo sh -s - -b /usr/local/bin

- Step-2: Setup LLM 
- Step-3: Get the LLM path 
> python -c "import sys; print (sys-prefix)"

- Step-4: Generate SBOM in cyclonedx format using below command
> syft dir:"/Users/pgeesala/Desktop/Projects/AI Ethics- Op Sec/-venv" \
-o cyclonedx-json=sbom_llama_venv.json

In [None]:
# Store results for comparison
sbom_results = {}

for sbom_file in SBOM_FILES:
    sbom_path = Path(sbom_file)
    
    if not sbom_path.exists():
        print(f"⚠ Skipping {sbom_path.name} - file not found")
        continue
    
    print("\n" + "="*70)
    print(f"Analyzing: {sbom_path.name}")
    print("="*70)
    
    # Load SBOM
    result = load_and_extract_sbom(sbom_path)
    sbom = result['sbom']
    components = result['components']
    
    print(f"SBOM Format: {sbom.get('bomFormat')}")
    print(f"Spec Version: {sbom.get('specVersion')}")
    print(f"Serial Number: {sbom.get('serialNumber')}")
    print(f"\nMetadata:")
    print(f"  Timestamp: {sbom['metadata']['timestamp']}")
    if 'component' in sbom['metadata']:
        print(f"  Component: {sbom['metadata']['component'].get('name', 'N/A')}")
    
    # Create DataFrame
    df_deps = pd.DataFrame(result['deps_data'])
    python_packages = df_deps[df_deps['purl'].str.startswith('pkg:pypi', na=False)].copy()
    
    print(f"\nTotal components: {len(components)}")
    print(f"Python packages: {len(python_packages)}")
    print(f"Binary components: {len(df_deps[df_deps['type'] == 'application'])}")
    
    # Check vulnerabilities
    vulnerabilities = check_vulnerabilities(python_packages)
    
    if vulnerabilities:
        print(f"\n⚠️  VULNERABILITIES FOUND: {len(vulnerabilities)}")
        for vuln in vulnerabilities:
            print(f"  - {vuln['package']} {vuln['version']}: {vuln['risk']}")
    else:
        print("\n✓ No known vulnerable versions detected")
    
    # License distribution
    print(f"\nLicense Distribution (Top 5):")
    print(python_packages['licenses'].value_counts().head(5))
    
    # Sample packages
    print(f"\nSample Python Packages (First 10):")
    display(python_packages[['name', 'version', 'licenses']].head(10))
    
    # Store results
    sbom_results[sbom_path.stem] = {
        'sbom': sbom,
        'df_deps': df_deps,
        'python_packages': python_packages,
        'vulnerabilities': vulnerabilities
    }

## Download required documents

In [None]:
import requests, os

os.makedirs("docs", exist_ok=True)

URLS = {
    "deepseek_r1.html": "https://huggingface.co/deepseek-ai/DeepSeek-R1",
    "llama3_8b_instruct.html": "https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct",
    "gpt4o_system_card.pdf": "https://cdn.openai.com/gpt-4o-system-card.pdf",
    "o3_o4mini_system_card.pdf": "https://cdn.openai.com/pdf/2221c875-02dc-4789-800b-e7758f3722c1/o3-and-o4-mini-system-card.pdf",
    "nist_deepseek_evaluation.pdf": "https://www.nist.gov/system/files/documents/2025/09/30/CAISI_Evaluation_of_DeepSeek_AI_Models.pdf"
}

for fname, url in URLS.items():
    resp = requests.get(url)
    resp.raise_for_status()
    with open(os.path.join("docs", fname), "wb") as f:
        f.write(resp.content)