# Data loading

## Speed test

Test the speed difference between a read from a raster image and a read from an hdf5 file

## raster images

In [7]:
import rasterio
from rich.console import Console
from rich.table import Table

from main.FolderInfos import FolderInfos
import os
import re
import matplotlib.pyplot as plt
import numpy as np
from osgeo import gdal
import time


def get_array_raster_file(path):
    with rasterio.open(path) as file_object:
        dataset = file_object.read(1)
    return dataset,file_object

FolderInfos.init(test_without_data=True)
files = [FolderInfos.data_test+f for f in os.listdir(FolderInfos.data_test)]
dico_by_extensions = {}
for sf,f in zip(os.listdir(FolderInfos.data_test),files):
    name = re.sub("^([A-Za-z0-9_]+[A-Za-z_]{,2})(20[0-9_A-Z]+)(\\.[a-z]+)$","\\2",sf)
    ext = sf.split(".")[-1]
    if ext not in dico_by_extensions:
        dico_by_extensions[ext] = {}
    dico_by_extensions[ext][name] = f

# Measures...
list_imgs_ids = ["027481_0319CB_0EB7","016505_01F10F_CE84","016753_01F88A_4864"]
dico_times = {k:{"values":[],"shape":None,"mem_size":None} for k in list_imgs_ids}
for _ in range(50):
    for [[name,path],uniq_id] in zip(dico_by_extensions["img"].items(),list_imgs_ids):
        initial_time = time.time_ns()
        dataset, file_object = get_array_raster_file(path)
        dico_times[uniq_id]["values"].append(time.time_ns()-initial_time)
        dico_times[uniq_id]["shape"] = dataset.shape
        dico_times[uniq_id]["mem_size"] = dataset.nbytes
print("We have the following access time for each image:")
console = Console(color_system="windows")
table = Table(show_header=True, header_style="bold magenta")
table.add_column("Name")
table.add_column("Access time (ms)")
table.add_column("Shape of the image")
table.add_column("Memory size (Bytes)")
for name_img,values in dico_times.items():
    table.add_row(
        f"{name_img} avg time", str(np.mean(values["values"])*1e-6),str(values["shape"]),values["mem_size"]
    )
all_times = [v["values"] for v in dico_times.values()]
table.add_row(
        f"Global avg time", str(np.mean(np.concatenate(all_times,axis=0)*1e-6)),"-"
    )
console.print(table)
        

We have the following access time for each image:
