# Code for analyzing the datasets

In [25]:
import os
import json
import numpy as np
from helper import draw_yolo
import plotly.graph_objects as go

In [2]:
# Open JSON data file
root = 'data/NordTank586x371'
f = open(os.path.join(root, "eda_json.json"))
data_dict = json.load(f)

In [4]:
# Number of labeled images and annotations
print("Total number of images:", len(data_dict['images']))
print("Total number of annotated images:", len(data_dict['annotations']))

Total number of images: 3294
Total number of annotations: 2995


In [6]:
# Compute the amount of dirt and damage bounding boxes per image
dirt_img, damage_img = 0, 0
dirt, damage = [], []
annotations = data_dict['annotations'].values()

for i, annotation in enumerate(annotations):
    labels = annotation['labels']
    
    num_damage = labels.count(1)
    if num_damage != 0:
        damage_img += 1
    damage.append(num_damage)
    
    num_dirt = labels.count(0)
    if num_dirt != 0:
        dirt_img += 1
    dirt.append(num_dirt)

In [7]:
print("The number of images containing dirt annotations:", dirt_img)
print("The number of images containing damage annotations:", damage_img)
print("The number of dirt annotations:", sum(dirt))
print("The number of damage annotations:", sum(damage))

The number of images containing dirt annotations: 563
The number of images containing damage annotations: 2527
The number of dirt annotations: 581
The number of damage annotations: 8770


In [8]:
# Plot the amount of dirt and damage labels
fig = go.Figure(data=[go.Bar(x=['Dirt', 'Damage'], y=[sum(dirt), sum(damage)], marker_color='#AA0DFE')])
fig.update_layout(
    title_text='Count dirt and damage labels',
    yaxis_title_text='Count',
    font=dict(
        size=22
    )
)
fig.show()

In [23]:
# Plot the amount of images containing dirt and/or damage annotations
fig = go.Figure(data=[go.Bar(x=['Dirt', 'Damage'], y=[dirt_img, damage_img], marker_color='#AA0DFE')])
fig.update_layout(
    title_text='Count images containing dirt/damage annotations',
    yaxis_title_text='Count',
        font=dict(
        size=22
    )
)
fig.show()

In [11]:
# Get list of area sizes
dirt_areas, damage_areas = [], []
annotations = data_dict['annotations'].values()

for i, annotation in enumerate(annotations):
    labels = annotation['labels']
    areas = annotation['areas']
    
    for j, a in enumerate(areas):
        if labels[j] == 1:
            damage_areas.append(a)
        else:
            dirt_areas.append(a)

In [14]:
# Plot the area size distribution of the dirt and damage bounding boxes
fig = go.Figure()
fig.add_trace(go.Histogram( x=damage_areas,
                            name="Damage",
                            marker_color='#DEA0FD',
                            xbins=dict(
                                        start=0,
                                        end=1.0,
                                        size=0.1
                                    ),
                            histnorm='percent'))

# Comment out if you want to plot the dirt area size distribution
# fig.add_trace(go.Histogram( x=dirt_areas,
#                             name="Dirt",
#                             marker_color='#DEA0FD',
#                             xbins=dict(
#                                         start=0,
#                                         end=1.0,
#                                         size=0.1
#                                     ),
#                             histnorm='percent'))

fig.update_layout(
    title_text='Area distribution of the bounding boxes for the damage annotations',
    xaxis_title_text='Area',
    yaxis_title_text='Count %',
    bargap=0.2,
    bargroupgap=0.1,
    font=dict(
        size=22
    )
)
fig.update_xaxes(range=[0, 1.0])
fig.show()

In [15]:
# Plot the amount of dirt and damage bounding boxes per image (for annotated images)
fig = go.Figure()
fig.add_trace(go.Histogram(x=damage, name="Damage", marker_color='#DEA0FD'))
fig.add_trace(go.Histogram(x=dirt, name="Dirt", marker_color='#AA0DFE'))

fig.update_layout(
    title_text='Number of bounding boxes per image',
    xaxis_title_text='Number of bounding boxes',
    yaxis_title_text='Count',
    bargap=0.2,
    bargroupgap=0.1,
    font=dict(
        size=22
    )
)
fig.show()

In [66]:
# Create a heatmap of all bounding box locations in the images
heatmap_dirt = np.zeros((371, 586), dtype=int)
heatmap_damage = np.zeros((371, 586), dtype=int)
for annotation in annotations:
    boxes = np.asarray(annotation['bboxes'], dtype=np.float32)
    if boxes.shape[1] != 0:
        for i, box in enumerate(boxes):
            l, t, r, b = draw_yolo(box, (586, 371))
            
            # Make separate heatmaps for dirt and damage bounding boxes
            if annotation['labels'][i] == 1.0:
                heatmap_damage[int(t):int(b), int(l):int(r)] += 1
            else:
                heatmap_dirt[int(t):int(b), int(l):int(r)] += 1

In [69]:
# Plot heatmap of damage labeled bounding boxes
fig = go.Figure(data=go.Heatmap(z=heatmap_damage))
fig.update_layout(
    title_text='Heatmap of the locations of the bounding boxes for damage'
)
fig['layout']['yaxis']['autorange'] = "reversed"
fig.show()

In [70]:
# Plot heatmap of dirt labeled bounding boxes
fig = go.Figure(data=go.Heatmap(z=heatmap_dirt))
fig.update_layout(
    title_text='Heatmap of the locations of the bounding boxes for dirt'
)
fig['layout']['yaxis']['autorange'] = "reversed"
fig.show()