In [None]:
# Magic functions -- Run Once
%load_ext autoreload
%autoreload 2
%matplotlib notebook

from IPython.core.display import display, HTML, Markdown
html_str = "<style>.container { width:99% !important; }\n"
html_str += "div.cell.selected { border-left-width: 1px !important; }\n"
html_str += "div.output_scroll { resize: vertical !important }</style>"
display(HTML(html_str))

# Move up one folder to reach the repo root
%cd ..

In [None]:
from data_interfaces.CLEAR_dataset import CLEAR_dataset
import data_interfaces.transforms as transforms
from torchvision.transforms import Compose
from stats import helper

data_root_path = "data"
data_version_name = "v3_fixed_1k_10_inst_1024_win_50_overlap"
clear_mean = [0.4258704782, 0.7333328128, 0.6625311375]
clear_std = [0.3685931265, 0.1786273718, 0.2194774598]

data_path = f"{data_root_path}/{data_version_name}"
dict_file_path = f"{data_path}/questions/dict.json"
input_config = {'type': 'raw'}

transforms_to_apply = Compose([
    transforms.ResizeImgBasedOnWidth(224),
    transforms.ToTensor(), 
    transforms.ImgBetweenZeroOne(),
    transforms.NormalizeSample(mean=clear_mean, std=clear_std, inplace=True)
    
])

datasets = {
    'train': CLEAR_dataset(data_root_path, data_version_name, input_config, 'train', 
                           dict_file_path=dict_file_path, transforms=transforms_to_apply)
}

#answer_to_family_map = helper.get_answer_to_family_map(f'{data_path}/attributes.json', to_lowercase=True, reduced_text=False)

# Need access to dataset object to add those transforms
for set_type, dataset in datasets.items():
    max_dims = dataset.get_max_width_image_dims()
    dataset.add_transform(transforms.PadTensor(max_dims))
    
    max_dim_value = max(max_dims)
    dataset.add_transform(transforms.ResizeTensor((max_dim_value, max_dim_value)))
    



## Scenes Analysis

In [None]:
# Scene Position Analysis per attribute
attributes = ['instrument', 'loudness', 'note', 'brightness', 'id']

for set_type, dataset in datasets.items():
    display(Markdown(f"## [{set_type.capitalize()}] Scene Position Analysis"))
    for attribute in attributes:
        obj_per_position = helper.scene_object_per_position(list(dataset.scenes.values()), attribute=attribute)
        helper.plot_attribute_per_position_matrix(obj_per_position, attribute)

In [None]:
# Scene distribution per attribute

for set_type, dataset in datasets.items():
    display(Markdown(f"## [{set_type.capitalize()}] Scene distribution Analysis"))
    for attribute in attributes:
        helper.plot_scene_distribution_per_attribute(list(dataset.scenes.values()), attribute, norm_hist=False)

## Questions Analysis