In [None]:
# Magic functions -- Run Once
%load_ext autoreload
%autoreload 2
%matplotlib notebook

# Move up one folder to reach the repo root
%cd ..

from utils.notebook.generic import full_width_notebook
full_width_notebook()

from IPython.core.display import display, Markdown, HTML

# Remove huge horizontal bar in figures
display(HTML("<style>.ui-dialog-titlebar{display:none;}</style>"))

In [None]:
!/home/j3romee/dev/venv/aqa-dataset-gen/bin/python /home/j3romee/dev/maitrise/aqa-dataset-gen/scripts/good_sounds_elementary_sounds_picker_cogent.py --good_sounds_folder /home/j3romee/dev/maitrise/good-sounds --output_path /home/j3romee/dev/maitrise/aqa-dataset-gen/test_elementary_sounds_cogent

In [None]:
import json
import pandas as pd
from utils.elementary_sounds import Elementary_Sounds

elementary_sounds_folder = "../aqa-dataset-gen/test_elementary_sounds_cogent"
#elementary_sounds_folder = "../aqa-dataset-gen/elementary_sounds"

columns = ['instrument', 'note', 'octave', 'duration', 'loudness', 'raw_loudness', 'brightness', 'raw_brightness']

elementary_sounds = Elementary_Sounds(elementary_sounds_folder, 'elementary_sounds.json', save_raw_values=True)
full_sounds_df = pd.DataFrame([elementary_sounds.get(i) for i in range(elementary_sounds.nb_sounds)], columns=elementary_sounds.get(0).keys())

sounds_df = full_sounds_df[columns].sort_values(['instrument', 'octave', 'note'])

In [None]:
brightness_threshold = [0.42, 0.47]
loudness_threshold = -27
#loudness_threshold = -29
loudness_threshold = [0.57, 0.62]

sounds_df.sort_values(['instrument', 'octave', 'note']).style.format({
    'duration': lambda x: f'{(x/1000):.2f} s'
})

In [None]:
import matplotlib.pyplot as plt
def grouped_df_histogram(df, group_by, col_to_plot, threshold=None, normalize=False):
    
    max_value = df[col_to_plot].max()
    min_value = df[col_to_plot].min()
    
    grouped = df.groupby(group_by)
    
    if threshold and type(threshold) != list:
        threshold = [threshold]

    min_x_lim = 99999
    max_x_lim = -99999
    axs = []
    for group_name, group in grouped:
        colors = iter(plt.rcParams['axes.prop_cycle'].by_key()['color'][1:])
        group_to_plot = group[[col_to_plot]]
        
        if normalize:
            #group_to_plot = (group_to_plot - group_to_plot.min()) / (group_to_plot.max()-group_to_plot.min())
            group_to_plot = (group_to_plot - min_value) / (max_value-min_value)
        
        ax = group_to_plot.hist(grid=False, bins=100)[0][0]
        ax.set_title(f"{group_by} - {str(group_name).capitalize()} -- {col_to_plot}")
        axs.append(ax)

        bottom_lim, up_lim = ax.get_xlim()

        if bottom_lim < min_x_lim:
            min_x_lim = bottom_lim
        if up_lim > max_x_lim:
            max_x_lim = up_lim

        if threshold:
            for thresh in threshold:
                ax.axvline(x=thresh, color=next(colors))
                
            if len(threshold) == 1:
                #under_thresh = group_to_plot[group_to_plot['raw_loudness'] < threshold[0]].count()
                under_thresh = group_to_plot[group_to_plot[col_to_plot] < threshold[0]].count()
                over_thresh = len(group_to_plot) - under_thresh
            elif len(threshold) == 2:
                #under_thresh = group_to_plot[group_to_plot['raw_loudness'] < threshold[0]].count()
                #over_thresh = group_to_plot[group_to_plot['raw_loudness'] > threshold[1]].count()
                under_thresh = group_to_plot[group_to_plot[col_to_plot] < threshold[0]].count()
                over_thresh = group_to_plot[group_to_plot[col_to_plot] > threshold[1]].count()
            else:
                continue
            
            display(Markdown(f"### {under_thresh.values[0]} / {over_thresh.values[0]}"))

    for ax in axs:
        ax.set_xlim((min_x_lim, max_x_lim))
        
    return axs

def group_df_discrete_hist(df, group_by, col_to_plot, normalize=False):
    ax = sounds_df.groupby(group_by)[col_to_plot].value_counts(normalize=False).unstack().plot.bar()
    ax.set_title(f'{col_to_plot.capitalize()} Count by {group_by}')
    
    return ax

def single_col_df_hist(df, col_to_plot, normalize=False):
    plt.figure()
    ax = df[col_to_plot].value_counts(normalize=normalize).plot.bar()
    ax.set_title(f'{col_to_plot.capitalize()} Distribution')

In [None]:
empty_lines = HTML("<br><br><br><br>")

display(Markdown("# Attributes distribution"))
single_col_df_hist(sounds_df, 'loudness', normalize=False)
single_col_df_hist(sounds_df, 'brightness', normalize=False)
#single_col_df_hist(sounds_df, 'instrument', normalize=False)
#single_col_df_hist(sounds_df, 'note', normalize=False)
#single_col_df_hist(sounds_df, 'octave', normalize=False)

#sounds_df[['brightness','loudness']].value_counts(normalize=True).unstack().plot.bar()

display(empty_lines, Markdown("# Brightness(label) by Instrument"))
group_df_discrete_hist(sounds_df, 'instrument', 'brightness', normalize=False)

display(empty_lines, Markdown("# Loudness(label) by Instrument"))
group_df_discrete_hist(sounds_df, 'instrument', 'loudness', normalize=False)

#display(empty_lines, Markdown("# Octave by Instrument"))
#group_df_discrete_hist(sounds_df, 'instrument', 'octave', normalize=False)

#display(empty_lines, Markdown("# Loudness(label) by Note"))
#group_df_discrete_hist(sounds_df, 'note', 'loudness', normalize=False)

#display(empty_lines, Markdown("# Brightness(label) by Note"))
#group_df_discrete_hist(sounds_df, 'note', 'brightness', normalize=False)

#display(empty_lines, Markdown("# Duration by Instrument"))
#grouped_df_histogram(full_sounds_df, 'instrument', 'duration', normalize=False)

do_norm = False
do_norm = True

display(empty_lines, Markdown("# Raw Loudness by Instrument"))
grouped_df_histogram(full_sounds_df, 'instrument', 'raw_loudness', threshold=loudness_threshold, normalize=do_norm)

display(empty_lines, Markdown("# Raw Brightness normalized by Instrument"))
grouped_df_histogram(full_sounds_df, 'instrument', 'raw_brightness', threshold=brightness_threshold, normalize=True)

#display(empty_lines, Markdown("# Raw Brightness normalized by Note"))
#grouped_df_histogram(full_sounds_df, 'note', 'raw_brightness', threshold=brightness_threshold, normalize=True)

#display(empty_lines, Markdown("# Raw Loudness by Note"))
#grouped_df_histogram(full_sounds_df, 'note', 'raw_loudness', threshold=loudness_threshold, normalize=False)

#display(empty_lines, Markdown("# Raw Loudness by Brightness (Label)"))
#grouped_df_histogram(full_sounds_df, 'brightness', 'raw_loudness', threshold=loudness_threshold)

#display(empty_lines, Markdown("# Raw Brightness normalized by Loudness (Label)"))
#grouped_df_histogram(full_sounds_df, 'loudness', 'raw_brightness', threshold=brightness_threshold, normalize=True)

#display(empty_lines, Markdown("# Raw Loudness by Octave"))
#grouped_df_histogram(full_sounds_df, 'octave', 'raw_loudness', threshold=loudness_threshold)

#display(empty_lines, Markdown("# Raw Brightness normalized by Octave"))
#grouped_df_histogram(full_sounds_df, 'octave', 'raw_brightness', threshold=loudness_threshold, normalize=True)

#display(empty_lines, Markdown("# Loudness(label) by Brightnes (label)"))
#group_df_discrete_hist(sounds_df, 'brightness', 'loudness', normalize=False)

#display(empty_lines, Markdown("# Loudness(label) by Octave"))
#group_df_discrete_hist(sounds_df, 'octave', 'loudness', normalize=False)

#display(empty_lines, Markdown("# Brightness(label) by Octave"))
#group_df_discrete_hist(sounds_df, 'octave', 'brightness', normalize=False)

In [None]:
print(elementary_sounds.min_loudness, elementary_sounds.max_loudness)

In [None]:
sounds_df.groupby(['instrument'])['loudness'].value_counts(normalize=False).unstack()

In [None]:
elementary_sounds.definition[0]

In [None]:
import json

with open("/home/j3romee/dev/maitrise/aqa-dataset-gen/new", 'w') as f:
    json.dump(elementary_sounds.definition, f)