# <font style="color:blue">Database HDF5 analysis of data structure</font>
### <font style="color:purple">Imports</font>

In [1]:
import os
import h5py
import json
from PIL import Image
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as patches

%matplotlib inline

### <font style="color:purple">Path to the database file</font>

In [2]:
database_path = os.path.join('.','results','database','datasets.h5')

### <font style="color:purple">Work with database</font>

#### ***Connect to the database for reading***

In [3]:
database = h5py.File(database_path, "r")
database.name

'/'

#### ***Base review of top structure***

In [4]:
database.keys()

<KeysViewHDF5 ['test', 'train', 'valid']>

In [5]:
List_datasets = list(database.keys())
List_datasets

['test', 'train', 'valid']

In [6]:
database.attrs.keys()

<KeysViewHDF5 ['class_number', 'names_class']>

In [7]:
List_database_attribute = list(database.attrs.keys())
List_database_attribute

['class_number', 'names_class']

#### ***Select number of classes***

In [8]:
number_of_classes = int(database.attrs[List_database_attribute[0]])
number_of_classes

2

#### ***Select list of class names***

In [9]:
classes_names = list(database.attrs[List_database_attribute[1]])
classes_names

['__background__', 'Vehicle registration plate']

#### ***Review structure of second level, type datasets***

In [10]:
dic_structure_type_datasets = {}
for name_type in List_datasets:
    list_data_groups = []
    for name in database[name_type]:
        list_data_groups.append(name)
    dic_structure_type_datasets[name_type] = list_data_groups

In [11]:
#dic_structure_type_datasets

#### ***Structure of*** *test* ***dataset***

In [12]:
list_test_images = dic_structure_type_datasets['test']
print(list_test_images[10:20]) # view 10 samples
print(f"Number of test samples is {len(list_test_images)}.")

['pexels193021', 'pexels2127014', 'pexels2127732', 'pexels2127733', 'pexels24394859', 'pexels250154', 'pexels253096', 'pexels2920064', 'pexels303316', 'pexels313779']
Number of test samples is 30.


***Show structure of one test image, number position 10.***

In [13]:
for name in database['test'][list_test_images[10]]:
    print(name)

image_link


***Show/select value of image link***

In [14]:
image_link_test10 = database['test'][list_test_images[10]]['image_link'][:]
image_link_test10 = image_link_test10.tolist()[0]
image_link_test10 = image_link_test10.decode('UTF-8')
image_link_test10

'/kaggle/input/opencv-evalution-alpr-dataset/cars_ALPR_test/images/pexels193021.jpg'

***Show attribute of image***

In [15]:
list_attributes_image_test10 = list(database['test'][list_test_images[10]].attrs.keys())
list_attributes_image_test10

['boxes', 'dimension', 'labels', 'name', 'type']

***Select xmin, ymin, xmax, ymax dimension of boxes***

In [19]:
boxes_values = database['test'][list_test_images[10]].attrs[list_attributes_image_test10[0]]
boxes_values = boxes_values.tolist()
boxes_values

[[706, 2346, 1410, 2546], [1410, 2436, 1432, 2446]]

***Select dimension of image***

In [22]:
dimension_image = database['test'][list_test_images[10]].attrs[list_attributes_image_test10[1]]
dimension_image = dimension_image.tolist()
dimension_image

[6000.0, 3376.0, 3.0]

***Select labels of detections***

In [25]:
image_labels = database['test'][list_test_images[10]].attrs[list_attributes_image_test10[2]]
image_labels = image_labels.tolist()
image_labels

[1, 1]

***Select name of image***

In [28]:
image_name = database['test'][list_test_images[10]].attrs[list_attributes_image_test10[3]]
image_name = image_name.decode('UTF-8')
image_name

'pexels193021.jpg'

***Select type of file***

In [32]:
image_type = database['test'][list_test_images[10]].attrs[list_attributes_image_test10[4]]
image_type = image_type.decode('UTF-8')
image_type

'jpg'

#### ***Structure of*** *valid* ***dataset***

In [33]:
list_test_images = dic_structure_type_datasets['valid']
print(list_test_images[10:20]) # view 10 samples
print(f"Number of test samples is {len(list_test_images)}.")

['0673b967f8c68eec', '06b024413ad385a7', '0727983dd5f9e4e6', '0787b0fa95f545a5', '0801961485534636', '081f5a6bc61b9c48', '08481c03daf6f35d', '091c033b2a7df15b', '09453a7c716a9ef3', '0c756c9366a8cb10']
Number of test samples is 386.


***Show structure of one valid image, number position 10.***

In [35]:
for name in database['valid'][list_test_images[10]]:
    print(name)

image_link


***Show/select value of image link***

In [36]:
image_link_test10 = database['valid'][list_test_images[10]]['image_link'][:]
image_link_test10 = image_link_test10.tolist()[0]
image_link_test10 = image_link_test10.decode('UTF-8')
image_link_test10

'/kaggle/input/vehicle-registration-plate/Dataset/validation/Vehicle registration plate/0673b967f8c68eec.jpg'

***Show attribute of image***

In [37]:
list_attributes_image_test10 = list(database['valid'][list_test_images[10]].attrs.keys())
list_attributes_image_test10

['boxes', 'dimension', 'labels', 'name', 'type']

***Select xmin, ymin, xmax, ymax dimension of boxes***

In [38]:
boxes_values = database['valid'][list_test_images[10]].attrs[list_attributes_image_test10[0]]
boxes_values = boxes_values.tolist()
boxes_values

[[217, 533, 479, 633]]

***Select dimension of image***

In [39]:
dimension_image = database['valid'][list_test_images[10]].attrs[list_attributes_image_test10[1]]
dimension_image = dimension_image.tolist()
dimension_image

[1024.0, 742.0, 3.0]

***Select labels of detections***

In [41]:
image_labels = database['valid'][list_test_images[10]].attrs[list_attributes_image_test10[2]]
image_labels = image_labels.tolist()
image_labels

[1]

***Select name of image***

In [42]:
image_name = database['valid'][list_test_images[10]].attrs[list_attributes_image_test10[3]]
image_name = image_name.decode('UTF-8')
image_name

'0673b967f8c68eec.jpg'

***Select type of file***

In [43]:
image_type = database['valid'][list_test_images[10]].attrs[list_attributes_image_test10[4]]
image_type = image_type.decode('UTF-8')
image_type

'jpg'

#### ***Structure of*** *train* ***dataset***

In [44]:
list_test_images = dic_structure_type_datasets['train']
print(list_test_images[10:20]) # view 10 samples
print(f"Number of test samples is {len(list_test_images)}.")

['000883b46adeca7e', '0009c4ddcaab18d3', '000f52302c1341eb', '00103aa8df33633a', '001065dbc46c29c8', '0010f4c10f7ab07e', '0011d11a6a5ab5ad', '0014e05dcc74d3f3', '0015ed86d644a332', '00166578c691cd43']
Number of test samples is 5308.


***Show structure of one valid image, number position 10.***

In [46]:
for name in database['train'][list_test_images[10]]:
    print(name)

image_link


***Show/select value of image link***

In [47]:
image_link_test10 = database['train'][list_test_images[10]]['image_link'][:]
image_link_test10 = image_link_test10.tolist()[0]
image_link_test10 = image_link_test10.decode('UTF-8')
image_link_test10

'/kaggle/input/vehicle-registration-plate/Dataset/train/Vehicle registration plate/000883b46adeca7e.jpg'

***Show attribute of image***

In [48]:
list_attributes_image_test10 = list(database['train'][list_test_images[10]].attrs.keys())
list_attributes_image_test10

['boxes', 'dimension', 'labels', 'name', 'type']

***Select xmin, ymin, xmax, ymax dimension of boxes***

In [49]:
boxes_values = database['train'][list_test_images[10]].attrs[list_attributes_image_test10[0]]
boxes_values = boxes_values.tolist()
boxes_values

[[823, 551, 915, 638]]

***Select dimension of image***

In [50]:
dimension_image = database['train'][list_test_images[10]].attrs[list_attributes_image_test10[1]]
dimension_image = dimension_image.tolist()
dimension_image

[1024.0, 773.0, 3.0]

***Select labels of detections***

In [54]:
image_labels = database['train'][list_test_images[10]].attrs[list_attributes_image_test10[2]]
image_labels = image_labels.tolist()
image_labels

[1]

***Select name of image***

In [55]:
image_name = database['train'][list_test_images[10]].attrs[list_attributes_image_test10[3]]
image_name = image_name.decode('UTF-8')
image_name

'000883b46adeca7e.jpg'

***Select type of file***

In [56]:
image_type = database['train'][list_test_images[10]].attrs[list_attributes_image_test10[4]]
image_type = image_type.decode('UTF-8')
image_type

'jpg'