In [1]:
from io import BytesIO
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import numba as nb
import os
import struct
import pandas as pd
import struct

In [2]:
areaperil_int = np.dtype(os.environ.get('AREAPERIL_TYPE', 'u4'))
oasis_float = np.dtype(os.environ.get('OASIS_FLOAT', 'f4'))
oasis_int_dtype = np.dtype('i4')
oasis_int = np.int32
oasis_int_size = np.int32().itemsize
# buff_int_size = buff_size // oasis_int_size

areaperil_int_relative_size = areaperil_int.itemsize // oasis_int_size
oasis_float_relative_size = oasis_float.itemsize // oasis_int_size
results_relative_size = 2 * oasis_float_relative_size

EventIndexBin = nb.from_dtype(np.dtype([('event_id', np.int32),
                                        ('offset', np.int64),
                                        ('size', np.int64)
                                        ]))

Index_type = nb.from_dtype(np.dtype([('start', np.int64),
                                     ('end', np.int64)
                                     ]))


Event = nb.from_dtype(np.dtype([('areaperil_id', areaperil_int),
                                ('intensity_bin_id', np.int32),
                                ('probability', oasis_float)
                                ]))

In [6]:
# read the first 8 bytes of the footprint index file to get the header
with open("./footprint.bin", "rb") as file:
    data = file.read(8)

number_of_intensity_bins = data[:4]
compression_type = data[4:8]
print(number_of_intensity_bins)

number_of_intensity_bins = int.from_bytes(number_of_intensity_bins, "little")  
compression_type = int.from_bytes(compression_type, "little")  
print(number_of_intensity_bins)
print(compression_type)
# print(data[:8])
# first 4 bytes is an integer is the number of intensity bins 


# second 4 bytes states that type of compression it is



b'2\x00\x00\x00'
50
1


In [40]:
from enum import Enum


class CompressionEnum(Enum):
    NO_HAZARD_UNCERTAINTY = 0
    HAS_HAZARD_UNCERTAINTY = 1
    INDEX_FILE_HAS_UNCOMPRESSED_SIZE = 2
    INDEX_FILE_HAS_UNCOMPRESSED_SIZE_AND_THERE_IS_HAZARD_UNCERTAINTY = 3


class FootprintIndexBinReader:
    
    def __init__(self, footprint_path: str, path: str) -> None:
        self.footprint_path = footprint_path
        self.path = path
        self.chunk_size = 20
        self.header_size = 8
        self.compression = False
#         if self.zipped is False:
        header = self.header
        self.number_of_intensity_bins = header["number of intensity bins"]
        self.compression_type = self.map_compression(header["compression type"])
        self.map_chunk_size()
#         else:
        self.compression = True
        
    def map_chunk_size(self) -> None:
        compressed_statuses = [
            CompressionEnum.INDEX_FILE_HAS_UNCOMPRESSED_SIZE,
            CompressionEnum.INDEX_FILE_HAS_UNCOMPRESSED_SIZE_AND_THERE_IS_HAZARD_UNCERTAINTY
        ]
        if self.compression_type in compressed_statuses:
            self.chunk_size = 28
            self.compression = True
    
    def read(self):
        with open(self.path, "rb") as file:
            data = "placeholder"
            while data:
                data = file.read(self.chunk_size)
                if data is None:
                    break
                offset = self.chunk_size
                yield self.process_data(data)
                
    def process_data(self, data) -> tuple:
        event_id = int.from_bytes(data[:4], "little")
        offset = int.from_bytes(data[4:12], "little")
        size = int.from_bytes(data[12:20], "little")
        if self.compression is True:
            uncompressed_size = int.from_bytes(data[20:28], "little")
            return event_id, offset, size, uncompressed_size
        return event_id, offset, size
    
    @staticmethod
    def map_compression(compression_type: int) -> CompressionEnum:
        return CompressionEnum(compression_type)
    
    @property
    def header(self) -> dict:
        placeholder = {}
        with open(self.footprint_path, "rb") as file:
            data = file.read(8)
        placeholder["number of intensity bins"] = int.from_bytes(data[:4], "little")
        placeholder["compression type"] = int.from_bytes(data[4:8], "little")
        print(placeholder)
        return placeholder
    
    @property
    def zipped(self) -> bool:
        if self.path[-2:] == ".z":
            return True
        return False
    

    
test = FootprintIndexBinReader("./footprint.bin", "./footprint.idx")
print(test.number_of_intensity_bins)
print(test.compression_type)
print(test.chunk_size)
print(test.compression)

counter = 0
for i in test.read():
    print(i)
    counter += 1
    if counter > 5:
        break

{'number of intensity bins': 50, 'compression type': 1}
50
CompressionEnum.HAS_HAZARD_UNCERTAINTY
20
True
(1, 8, 60000, 0)
(2, 60008, 60000, 0)
(3, 120008, 60000, 0)
(4, 180008, 60000, 0)
(5, 240008, 60000, 0)
(6, 300008, 60000, 0)


In [41]:


class FootprintReader:
    
    def __init__(self, path: str, chunk_size: int) -> None:
        self.path = path
        self.chunk_size = chunk_size
        
    def process_data(self, data) -> tuple:
        areaperil_id = int.from_bytes(data[:4], "little")
        intensity_bin_id = int.from_bytes(data[4:8], "little")
        probability = struct.unpack('f', data[8:12])[0]
        
        return areaperil_id, intensity_bin_id, probability
        
    
    def read(self):
        with open(self.path, "rb") as file:
            data = file.read(8)
            while data:
                data = file.read(self.chunk_size)
                if data is None:
                    break
                offset = self.chunk_size
                yield self.process_data(data)

In [48]:
test = FootprintReader("./footprint.bin", 12)

counter = 0
for i in test.read():
    print(i)
    counter += 1
    if counter > 5:
        break

(1, 1, 0.0)
(1, 2, 0.062247294932603836)
(1, 3, 0.06835487484931946)
(1, 4, 0.07045239955186844)
(1, 5, 0.038754019886255264)
(1, 6, 0.0)


In [49]:
test = FootprintIndexBinReader("./static/fooprint.bin.z", "./static/footprint.idx.z")
print(test.number_of_intensity_bins)
print(test.compression_type)
print(test.chunk_size)
print(test.compression)
counter = 0
for i in test.read():
    print(i)
    counter += 1
    if counter > 5:
        break

{'number of intensity bins': 50, 'compression type': 1}
50
CompressionEnum.HAS_HAZARD_UNCERTAINTY
20
True
(1, 8, 60000, 0)
(2, 60008, 60000, 0)
(3, 120008, 60000, 0)
(4, 180008, 60000, 0)
(5, 240008, 60000, 0)
(6, 300008, 60000, 0)
