In [None]:
# This is the code to parse the .spdx file for the SBOMs
# It creates 3 .csv files
# spdx_output.csv: Files independent of packages
# relationships_output.csv: Relationship between differentr packages
# packages_output.csv: Contains different packages


import csv

from google.colab import drive
drive.mount('/content/drive')

# Define the path to your SPDX file
spdx_file_path = 'v1.19.12.spdx'  # Replace with your file path

# Create lists to store SPDX information
spdx_entries = []
package_entries = []
relationship_entries = []  # New list for relationships

# Define a variable to track the current entry
current_entry = None

# Read the SPDX file line by line
with open(spdx_file_path, 'r') as spdx_file:
    for line in spdx_file:
        line = line.strip()
        if line.startswith('##### Package:'):
            # Start of a new SPDX Package entry
            if current_entry:
                package_entries.append(current_entry)
            current_entry = {'type': 'Package'}
        elif line.startswith('FileName:'):
            # Start of a new SPDX File entry, save the previous one
            if current_entry:
                if current_entry['type'] == 'Package':
                    package_entries.append(current_entry)
                else:
                    spdx_entries.append(current_entry)
            current_entry = {'type': 'File', 'FileName': line[len('FileName: '):]}
        elif line.startswith('Relationship:'):
            # Handle Relationship entries
            parts = line.split(' ')
            if len(parts) == 4:
                relationship_type = parts[1]
                source = parts[2]
                target = parts[3]
                relationship_entries.append({
                    'RelationshipType': relationship_type,
                    'Source': source,
                    'Target': target
                })
        elif current_entry:
            # Add key-value pairs to the current entry
            parts = line.split(': ', 1)
            if len(parts) == 2:
                key, value = parts
                if key == 'FileChecksum':
                    # Initialize FileChecksum as a list and add values to it
                    current_entry.setdefault(key, []).append(value)
                elif key == 'PackageChecksum':
                    # Initialize FileChecksum as a list and add values to it
                    current_entry.setdefault(key, []).append(value)
                else:
                    current_entry[key] = value

# Append the last SPDX entry (if any)
if current_entry:
    if current_entry['type'] == 'Package':
        package_entries.append(current_entry)
    elif current_entry['type'] == 'File':
        spdx_entries.append(current_entry)

# Define the path for the output SPDX CSV file
spdx_csv_file_path = '/content/drive/MyDrive/Cloud Computing EC528/spdx_output.csv'  # Replace with your desired output file path

# Write the extracted SPDX file information to a CSV file
with open(spdx_csv_file_path, 'w', newline='') as csv_file:
    fieldnames = ['FileName', 'SPDXID', 'FileChecksum', 'LicenseConcluded', 'LicenseInfoInFile', 'FileCopyrightText']
    csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Write the CSV header
    csv_writer.writeheader()

    # Write the extracted SPDX file information to the SPDX CSV file
    for entry in spdx_entries:
        # Join the list of checksums into a single string
        entry['FileChecksum'] = ', '.join(entry.get('FileChecksum', []))

        csv_writer.writerow({
            'FileName': entry.get('FileName', 'N/A'),
            'SPDXID': entry.get('SPDXID', 'N/A'),
            'FileChecksum': entry.get('FileChecksum', 'N/A'),
            'LicenseConcluded': entry.get('LicenseConcluded', 'N/A'),
            'LicenseInfoInFile': entry.get('LicenseInfoInFile', 'N/A'),
            'FileCopyrightText': entry.get('FileCopyrightText', 'N/A')
        })

print(f"SPDX CSV file saved at {spdx_csv_file_path}")

# Define the path for the output Packages CSV file
package_csv_file_path = '/content/drive/MyDrive/Cloud Computing EC528/packages_output.csv'  # Replace with your desired output file path

# Write the extracted SPDX Package information to a CSV file
with open(package_csv_file_path, 'w', newline='') as csv_file:
    fieldnames = ['PackageName', 'SPDXID', 'PackageDownloadLocation', 'FilesAnalyzed', 'PackageLicenseConcluded', 'PackageLicenseDeclared', 'PackageCopyrightText', 'PackageChecksum']
    csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Write the CSV header
    csv_writer.writeheader()

    # Write the extracted SPDX Package information to the Packages CSV file
    for entry in package_entries:
        # Join the list of checksums into a single string
        entry['PackageChecksum'] = ', '.join(entry.get('PackageChecksum', []))

        csv_writer.writerow({
            'PackageName': entry.get('PackageName', 'N/A'),
            'SPDXID': entry.get('SPDXID', 'N/A'),
            'PackageDownloadLocation': entry.get('PackageDownloadLocation', 'N/A'),
            'FilesAnalyzed': entry.get('FilesAnalyzed', 'N/A'),
            'PackageLicenseConcluded': entry.get('PackageLicenseConcluded', 'N/A'),
            'PackageLicenseDeclared': entry.get('PackageLicenseDeclared', 'N/A'),
            'PackageCopyrightText': entry.get('PackageCopyrightText', 'N/A'),
            'PackageChecksum': entry.get('PackageChecksum', 'N/A')
        })

print(f"Packages CSV file saved at {package_csv_file_path}")

# Define the path for the output Relationships CSV file
relationship_csv_file_path = '/content/drive/MyDrive/Cloud Computing EC528/relationships_output.csv'  # Replace with your desired output file path

# Write the extracted SPDX Relationship information to a CSV file
with open(relationship_csv_file_path, 'w', newline='') as csv_file:
    fieldnames = ['RelationshipType', 'Source', 'Target']
    csv_writer = csv.DictWriter(csv_file, fieldnames=fieldnames)

    # Write the CSV header
    csv_writer.writeheader()

    # Write the extracted SPDX Relationship information to the Relationships CSV file
    for entry in relationship_entries:
        csv_writer.writerow({
            'RelationshipType': entry.get('RelationshipType', 'N/A'),
            'Source': entry.get('Source', 'N/A'),
            'Target': entry.get('Target', 'N/A')
        })

print(f"Relationships CSV file saved at {relationship_csv_file_path}")
