In [7]:
import numpy as np
import gzip


In [9]:

def print_npy_header(file_path):
    try:
        with open(file_path, 'rb') as f:
            # Read the first few bytes
            magic_string = f.read(6)
            if not magic_string.startswith(b'\x93NUMPY'):
                raise ValueError(f"Not a valid .npy file. Got magic string: {magic_string}")

            # Use numpy's format to read the magic and header
            version = np.lib.format.read_magic(f)
            print(f"Magic String: {magic_string}, Version: {version}")

            # Read the header length and header itself
            header_len = np.lib.format._read_header_len(f, version)
            header = f.read(header_len).decode('latin1')
            
            # Print the header
            print("Header:")
            print(header)
    except Exception as e:
        print(f"Error reading header: {e}")

In [12]:
file_path = '/data/home/melashri/pv_finder/offline/data/validation/pv_HLT1CPU_MinBiasMagDown_14Nov_t2hists_Arrays_validation_allEvents.npy'
print_npy_header(file_path)

Error reading header: Not a valid .npy file. Got magic string: b'PK\x03\x04\x00\x00'


In [6]:
with open(file_path, 'rb') as f:
    content = f.read(16)
    print(content)

b"\x93NUMPY\x01\x00v\x00{'desc"


In [8]:

with gzip.open(file_path, 'rb') as f:
    array = np.load(f)
print(array)

BadGzipFile: Not a gzipped file (b'\x93N')

In [11]:
def manually_parse_npy(file_path):
    try:
        with open(file_path, 'rb') as f:
            # Read the magic string and version numbers
            magic_string = f.read(6)
            if magic_string != b'\x93NUMPY':
                raise ValueError(f"Unexpected magic string: {magic_string}")

            version_major = int.from_bytes(f.read(1), byteorder='little')
            version_minor = int.from_bytes(f.read(1), byteorder='little')
            print(f"Version: {version_major}.{version_minor}")

            # Read the header length (using version-specific logic)
            if version_major == 1:
                header_len = int.from_bytes(f.read(2), byteorder='little')
            elif version_major == 2:
                header_len = int.from_bytes(f.read(4), byteorder='little')
            else:
                raise ValueError(f"Unsupported version: {version_major}.{version_minor}")

            # Read the actual header
            header = f.read(header_len).decode('latin1')
            print("Header:")
            print(header)
    except Exception as e:
        print(f"Error reading file: {e}")

manually_parse_npy(file_path)


Version: 1.0
Header:
{'descr': '<f4', 'fortran_order': False, 'shape': (452560, 9, 250), }                                                

