In [5]:
from asn1crypto import cms

def extract_p7m(p7m_path, output_path):
    with open(p7m_path, 'rb') as f:
        p7m_data = f.read()

    # Load the CMS (Cryptographic Message Syntax) structure
    content_info = cms.ContentInfo.load(p7m_data)

    # Drill down: ContentInfo -> SignedData -> EncapsulatedContentInfo -> Content
    # The actual XML is inside the 'content' field of 'encap_content_info'
    signed_data = content_info['content']
    xml_content = signed_data['encap_content_info']['content'].native

    with open(output_path, 'wb') as f:
        f.write(xml_content)
    
    print(f"Extracted to {output_path}")

# Usage
extract_p7m(
    p7m_path='p7m_files/FatturaPa  GSPA-2023-26-28.02.23 BAUSTOFF H & S  SAS DI GUFLER JOHANN & CO.  IT02046570426_3XJiD(E100798828).P7M',
    output_path='xml_files/invoice.xml')

Extracted to xml_files/invoice.xml


In [7]:
# find .p7m files in p7m_files/ and extract them to xml_files/
import os

for filename in os.listdir('p7m_files'):
    if filename.endswith('.P7M') or filename.endswith('.p7m'):
        p7m_path = os.path.join('p7m_files', filename)
        xml_filename = filename[:-4] + '.xml'
        output_path = os.path.join('xml_files', xml_filename)
        extract_p7m(p7m_path, output_path)

Extracted to xml_files\FatturaPa  GSPA-2023-26-28.02.23 BAUSTOFF H & S  SAS DI GUFLER JOHANN & CO.  IT02046570426_3XJiD(E100798828).xml
Extracted to xml_files\FatturaPa  GSPA-2023-28-15.03.23 BAUSTOFF H & S  SAS DI GUFLER JOHANN & CO.  IT02046570426_41Rik(E100801151).xml
Extracted to xml_files\FatturaPa  GSPA-2023-29-15.03.23 BAUSTOFF H & S  SAS DI GUFLER JOHANN & CO.  IT02046570426_41Riv(E100801154).xml
Extracted to xml_files\FatturaPa  GSPA-2023-30-15.03.23 BAUSTOFF H & S  SAS DI GUFLER JOHANN & CO.  IT02046570426_41RiC(E100801158).xml
Extracted to xml_files\FatturaPa  GSPA-2023-56-30.04.23 BAUSTOFF H & S  SAS DI GUFLER JOHANN & CO.  IT02046570426_4tjkA(E100817650).xml
Extracted to xml_files\FatturaPa  GSPA-2024-37-30.04.24 BAUSTOFF H & S  SAS DI GUFLER JOHANN & CO.  IT02046570426_80iDL(E101036927).xml
Extracted to xml_files\FatturaPa  RGPA-2023-102-31.08.23 BAUSTOFF H & S  SAS DI GUFLER JOHANN & CO.  IT02046570426_5Cu1C(E100861902).xml
Extracted to xml_files\FatturaPa  RGPA-2023-103