In [1]:
# Magic functions -- Run Once
%load_ext autoreload
%autoreload 2
%matplotlib notebook

# Move up one folder to reach the repo root
%cd ..

from utils.notebook.generic import full_width_notebook
full_width_notebook()

/home/abdj2702/dev/maitrise/film-aqa


In [2]:
from IPython.core.display import Markdown
from main import parse_args_string, prepare_for_task

data_root_path = "data"
random_seed = 876944
pad_per_batch = False
image_height = 224
image_width = 224
show_test_set_stats = False

data_version_name = "CLEAR_50k_4_inst_1024_win_50_overlap"

# FIXME : Clear mean & std might be wrong (It is written in the config file)

arguments = (
    f"--notebook_data_analysis --version_name {data_version_name} "
    f"--random_seed {random_seed} --dict_folder questions "
    f"--no_feature_extractor --h5_image_input"
)

if pad_per_batch:
    arguments += "--pad_per_batch"
            
args = parse_args_string(arguments)
task_and_more, dataloaders, model_and_more = prepare_for_task(args)
print("Preparation done")
task, args, flags, paths, device = task_and_more
film_model_config, film_model, optimizer, loss_criterion, scheduler, tensorboard = model_and_more
datasets = {set_type:dloader.dataset for set_type, dloader in dataloaders.items() if set_type != 'test' or show_test_set_stats}


  from tqdm.autonotebook import tqdm



Task 'Notebook Data Analysis' for version 'CLEAR_50k_4_inst_1024_win_50_overlap'

Using device 'cuda:0'
Creating Datasets
Creating Dataloaders
Preparation done


## Questions Analysis

In [36]:
from collections import Counter, defaultdict
from statistics import mean


def get_scene_metrics(dataset):
    scenes = [s['definition'] for s in dataset.scenes.values()]
    
    scenes_metrics = []
    individual_sound_durations = {}
    
    for scene in scenes:
        scene_metrics = {
            'id': int(scene['scene_index']),
            'instruments' : Counter(),
            'notes': Counter(),
            'brightnesses': Counter(),
            'loudnesses': Counter(),
            'silence_duration': scene['silence_before'],
            'sound_durations': [],
            'sound_duration_mean': 0
        }
        
        
        for pos, sound in enumerate(scene['objects']):
            scene_metrics['instruments'][sound['instrument']] += 1
            scene_metrics['notes'][sound['note']] += 1
            scene_metrics['brightnesses'][sound['brightness']] += 1
            scene_metrics['loudnesses'][sound['loudness']] += 1
            scene_metrics['silence_duration'] += sound['silence_after']
            scene_metrics['sound_durations'].append(sound['duration'])
            individual_sound_durations[sound['id']] = sound['duration']
            
        scene_metrics['number_sound'] = len(scene['objects'])
            
        scene_metrics['sound_duration_mean'] = mean(scene_metrics['sound_durations'])
        scene_metrics['scene_total_duration'] = sum(scene_metrics['sound_durations']) + scene_metrics['silence_duration']
        
        scenes_metrics.append(scene_metrics)
        
        
    # Global metrics
    nb_sounds = [s['number_sound'] for s in scenes_metrics]
    total_durations = [s['scene_total_duration'] for s in scenes_metrics]
    silence_durations = [s['silence_duration'] for s in scenes_metrics]
    individual_sound_durations = individual_sound_durations.values()
    
    
    global_scene_metrics = {
        'mean_number_sound' : mean(nb_sounds),
        'min_number_sound' : min(nb_sounds),
        'max_number_sound' : max(nb_sounds),
        
        'mean_duration' : mean(total_durations),
        'min_duration' : min(total_durations),
        'max_duration' : max(total_durations),
        
        'mean_silence_duration' : mean(silence_durations),
        'min_silence_duration' : min(silence_durations),
        'max_silence_duration' : max(silence_durations),
        
        'mean_sound_duration' : mean(individual_sound_durations),
        'min_sound_duration' : min(individual_sound_durations),
        'max_sound_duration' : max(individual_sound_durations),
    }
        
    return global_scene_metrics, scenes_metrics


def get_question_metrics(dataset):
    # Global stats
    global_metrics = {
        'vocab_dist': Counter(),
        'unique_word_per_position': [[] for i in range(dataset.longest_question_length)],
        'word_per_position': [[] for i in range(dataset.longest_question_length)],
        'answer_dist': Counter(),
        'answer_family_dist': Counter(),
        'total_unk_count': 0,
        'total_word_count': 0,
        'unique_word_list': dict(),
        'unique_word_count': 0
    }
    
    per_game_metrics = []
    
    for i in range(len(dataset)):
        game = dataset.get_game(i, decode_tokens=True)
        words = game['question'].split(' ')
        
        # Answer related
        answer = game['answer']
        global_metrics['answer_dist'][answer] += 1
        answer_family = dataset.answer_to_family[answer]
        global_metrics['answer_family_dist'][answer_family] += 1
        
        
        # Program Related
        question_program = game['program']
        relation_nodes = [node for node in question_program if node['type'] == 'relate']
        nb_relation = len(relation_nodes)
        
        # TODO :
        # Scenes metrics
            # How many different
                # Instrument
                # Brightness
                # Loudness
                # Notes
            # How many sounds similar to answer
                # Instrument
                # Brightness
                # Loudness
                # Notes
            # Nb sound in scene
            # Silence metrics
            
        # TODO : How many attribute define the "related" value
        # TODO : Nb filter associated with requested object (How many attributes are we refering too Ex : sound = 0 loud sound = 1, loud bright sound = 2, loud bright F# = 3)
        # TODO : Are we refering to another object with similar properties in the same question ? (Ex another loud sound, another sound of the same instrument, etc)
        game_metrics = {
            'id': game['id'],
            'scene_id': game['image']['id'],
            'answer': answer,
            'answer_family': answer_family,
            'have_relation': nb_relation > 0,
            'nb_relation': nb_relation,
            'relations': [node['value_inputs'][0] for node in relation_nodes],
            'nb_output_per_relation': [len(node['_output']) for node in relation_nodes],
            'unk_count': 0,
            'length': len(words),
            'refer_to_answer_family_in_question': answer_family in words,   # FIXME : This won't work for count, position_global and postion_instrument
            'answer_in_question': answer in words,    # FIXME : We are most probably refering to another object of the scene IE : What is the loudness of the cello playing after the "loud" violin ?
            'word_dist': Counter(),
            'word_per_position': [[] for i in range(dataset.longest_question_length)]
        }
        
        for word_pos, word in enumerate(words):
            if word != '<unk>':
                global_metrics['vocab_dist'][word] += 1
                global_metrics['total_word_count'] += 1
                
            global_metrics['unique_word_list'][word] = 1   # We only need the key, faster to assign 1 everytime then check if present in list
            game_metrics['word_dist'][word] += 1
            game_metrics['word_per_position'][word_pos].append(word)
            global_metrics['word_per_position'][word_pos].append(word)
         
        game_metrics['unk_count'] = game_metrics['word_dist']['<unk>']
        global_metrics['total_unk_count'] += game_metrics['unk_count']
                
        per_game_metrics.append(game_metrics)
            
            
    lengths = [m['length'] for m in per_game_metrics]
    global_metrics['mean_length'] = mean(lengths)
    global_metrics['min_length'] = min(lengths)
    global_metrics['max_length'] = max(lengths)
    
    global_metrics['unique_word_list'] = list(global_metrics['unique_word_list'].keys())
    global_metrics['unique_word_count'] = len(global_metrics['unique_word_list'])
    
    for pos, words in enumerate(global_metrics['word_per_position']):
        global_metrics['unique_word_per_position'][pos] = list(set(words))
        
                
    return global_metrics, per_game_metrics
        
    
def parse_program(question_program):
    relations = []
    
    for i, node in enumerate(question_program):
        if node['type'] == 'scene':
            # Nothing to do with scene node
            continue
            
        elif node['type'].startswith('filter'):
            print("filter")
        elif node['type'] == 'relate':
            relations.append(node['value_input'][0])
            
        elif node['type'].startswith('query'):
            print("YO")
        
        
        

question_global_metrics, per_game_metrics = get_question_metrics(datasets['train'])
scenes_global_metrics, scenes_metrics = get_scene_metrics(datasets['train'])

In [38]:
question_global_metrics.keys()

dict_keys(['vocab_dist', 'unique_word_per_position', 'word_per_position', 'answer_dist', 'answer_family_dist', 'total_unk_count', 'total_word_count', 'unique_word_list', 'unique_word_count', 'mean_length', 'min_length', 'max_length'])

In [39]:
print(question_global_metrics['mean_length'])
print(question_global_metrics['min_length'])
print(question_global_metrics['max_length'])

17.12387142857143
6
28


In [35]:
per_game_metrics[0]

{'id': 0,
 'scene_id': 0,
 'answer': 'quiet',
 'answer_family': 'loudness',
 'have_relation': True,
 'nb_relation': 1,
 'relations': ['before'],
 'nb_output_per_relation': [8],
 'unk_count': 0,
 'length': 16,
 'refer_to_answer_family_in_question': True,
 'answer_in_question': True,
 'word_dist': Counter({'what': 1,
          'is': 1,
          'the': 3,
          'loudness': 1,
          'of': 1,
          'dark': 1,
          'clarinet': 1,
          'sound': 2,
          'playing': 1,
          'before': 1,
          'quiet': 1,
          'bright': 1,
          '?': 1}),
 'word_per_position': [['what'],
  ['is'],
  ['the'],
  ['loudness'],
  ['of'],
  ['the'],
  ['dark'],
  ['clarinet'],
  ['sound'],
  ['playing'],
  ['before'],
  ['the'],
  ['quiet'],
  ['bright'],
  ['sound'],
  ['?'],
  [],
  [],
  [],
  [],
  [],
  [],
  [],
  [],
  [],
  [],
  [],
  []]}

In [15]:
question_global_metrics.keys()

dict_keys(['vocab_dist', 'unique_word_per_position', 'word_per_position', 'answer_dist', 'answer_family_dist', 'total_unk_count', 'total_word_count', 'unique_word_list', 'unique_word_count'])

In [31]:
sorted(question_global_metrics['answer_dist'].items(), key= lambda x:x[1], reverse=True)[0][1] / 140000

0.07504285714285715

In [20]:
per_game_metrics[0]

{'id': 0,
 'scene_id': 0,
 'answer': 'quiet',
 'answer_family': 'loudness',
 'have_relation': True,
 'nb_relation': 1,
 'relations': ['before'],
 'nb_output_per_relation': [8],
 'unk_count': 0,
 'length': 16,
 'refer_to_answer_family_in_question': True,
 'answer_in_question': True,
 'word_dist': Counter({'what': 1,
          'is': 1,
          'the': 3,
          'loudness': 1,
          'of': 1,
          'dark': 1,
          'clarinet': 1,
          'sound': 2,
          'playing': 1,
          'before': 1,
          'quiet': 1,
          'bright': 1,
          '?': 1}),
 'word_per_position': [['what'],
  ['is'],
  ['the'],
  ['loudness'],
  ['of'],
  ['the'],
  ['dark'],
  ['clarinet'],
  ['sound'],
  ['playing'],
  ['before'],
  ['the'],
  ['quiet'],
  ['bright'],
  ['sound'],
  ['?'],
  [],
  [],
  [],
  [],
  [],
  [],
  [],
  [],
  [],
  [],
  [],
  []]}

In [None]:

def get_templates_infos(dataset):
    all_templates = []
    templates_per_scene = {}
    for scene_id, scene_info in dataset.scenes.items():
        templates_idx = [dataset.questions[question_id]['template_index'] for question_id in scene_info['question_idx']]
        templates_per_scene[scene_id] = templates_idx
        all_templates += templates_idx
        
    return all_templates, templates_per_scene

# TODO: Show discrete histogram of all templates
# TODO: Show 2d matrix of templates_per_scene (Too much data ?)
all_templates, templates_per_scene = get_templates_infos(datasets['train'])

In [None]:
next(iter(datasets['train'].scenes.values()))

In [None]:
datasets['train'].get_game(0, decode_tokens=True)

## Scenes Analysis

In [None]:
from utils.notebook.dataset_analysis import scene_object_per_position, plot_attribute_per_position_matrix, plot_scene_distribution_per_attribute

In [None]:
max([len(s['definition']['objects']) for s in dataset.scenes.values()])

In [None]:
# Scene Position Analysis per attribute
attributes = ['instrument', 'loudness', 'note', 'brightness', 'id']

for set_type, dataset in datasets.items():
    display(Markdown(f"## [{set_type.capitalize()}] Scene Position Analysis"))
    for attribute in attributes:
        obj_per_position = scene_object_per_position(list(dataset.scenes.values()), attribute=attribute)
        plot_attribute_per_position_matrix(obj_per_position, attribute)

In [None]:
# Scene distribution per attribute

for set_type, dataset in datasets.items():
    display(Markdown(f"## [{set_type.capitalize()}] Scene distribution Analysis"))
    for attribute in attributes:
        plot_scene_distribution_per_attribute(list(dataset.scenes.values()), attribute, norm_hist=False)

In [None]:
# Durations
from utils.notebook.dataset_analysis import plot_scene_duration_hist, plot_scene_total_silence_distribution, plot_scene_silence_by_position_distribution
import matplotlib.pyplot as plt

for i, (set_type, dataset) in enumerate(datasets.items()):
    display(Markdown(f"## [{set_type.capitalize()}] Scene durations Analysis"))
    fig, axs = plt.subplots(1, 2)
        
    plot_scene_duration_hist(dataset.scenes.values(), title=f"[{set_type.capitalize()}]Scene durations", legend_label=f"{set_type.capitalize()}", fig_ax=(fig, axs[0]), norm_hist=False)
    plot_scene_total_silence_distribution(dataset.scenes.values(), title=f"[{set_type.capitalize()}]Silence durations", legend_label=f"{set_type.capitalize()}", fig_ax=(fig, axs[1]), norm_hist=False)
    plot_scene_silence_by_position_distribution(dataset.scenes.values(), title=f"[{set_type.capitalize()}]Silence per position", legend_label=f"{set_type.capitalize()}", norm_hist=False)
    