In [None]:
import sys
import glob
import json
from collections import Counter

sys.path.append("..")
from melolib.midi import parse_midi
from melolib.notation import generate_score_from_parsed_midi, DurationClass, Rest, Duration

def clean_score(score):
    # remove parts with small number of notes
    # & parts whose key signature has a lot of errors
    threshold = 0
    score = [
        part
        for part in score
        if len(part["data"]) > 30
            and part["key_signature"] != None
            and part["key_signature"][0][1]/len(part["data"]) <= threshold
    ]
    for part in score:
        part["data"] = [
            {
                "key": info["notation"]["keys"][0],
                "duration": info["notation"]["duration"]
            } for info in part["data"]
        ]
    return score



def gen_melody_json():
    base_path = "../../res/adl-piano-midi"
    files = [midi_file for midi_file in  glob.glob(base_path + "/**/*.mid", recursive=True)]
    
    files = files + extra_files
    
    scores = []
    for i, file in enumerate(files):
        if i % 500 == 0:
            print(f"Progress {i/len(files):.2f}")
        try:
            midi = parse_midi(file)
            score = generate_score_from_parsed_midi(midi)
            cleaned_score = clean_score(score)
            scores.append(cleaned_score)
        except:
            ...
    
    with open("../../out/melody.json", "w") as f:
        json.dump(scores, f)

# gen_melody_json()

In [None]:
"""
Density: number of rests
Ambiguitiy: (Works on multiple key signatures)
Dynamic Range: range in octaves
"""

from melolib.music import Note

duration_repr_to_class = {
    "w": DurationClass.Whole,
    "h": DurationClass.Half,
    "q": DurationClass.Quarter,
    "8": DurationClass.Eighth,
    "16": DurationClass.Sixteenth,
    "32": DurationClass.ThirtySecond,
    "64": DurationClass.SixtyFourth,
}

def parse_duration(duration):
    r = duration.rstrip(".")
    duration_class = duration_repr_to_class[r]
    num_dots = len(duration) - len(r)
    return Duration(duration_class, num_dots)

def parse_note(note):
    note, octave = note.split("/")
    return Note.from_name(note, int(octave))

def get_density(track):
    rest_duration = 0
    melody_duration = 0
    for info in track["data"]:
        note = parse_note(info["key"])
        duration = parse_duration(info["duration"].replace("r", ""))
        is_rest = "r" in info["duration"]
        duration_num = (1 << (6 - duration.duration_class.value)) + (1 << max(5 - duration.duration_class.value, DurationClass.SixtyFourth.value)) * duration.dots
        if is_rest:
            rest_duration += duration_num
        else:
            melody_duration += duration_num
    return melody_duration / (rest_duration + melody_duration)

def get_ambiguity(track):
    if track["key_signature"] is None:
        return -1
    a, b, c = track["key_signature"]
    return (not a[1]) + (not b[1]) + (not c[1]) - 1

def get_dynamic_range(track):
    min_note = float("inf")
    max_note = float("-inf")
    for info in track["data"]:
        note = parse_note(info["key"])
        min_note = min(min_note, note.note)
        max_note = max(max_note, note.note)
    return max_note - min_note

In [None]:
with open("../../out/melody.json") as f:
    melodies = json.load(f)

dynamic_ranges = []
densities = []
ambiguities = []
for song in melodies:
    for track in song:
        dynamic_ranges.append(get_dynamic_range(track))
        densities.append(get_density(track))
        ambiguities.append(get_ambiguity(track))

import matplotlib.pyplot as plt
import numpy as np

ranges = np.arange(0, 1.1, 0.1)
range_values1 = {f'{i:.1f}-{i+.1:.1f}': 0 for i in ranges[:-1]}

for val in densities:
    for key in range_values1:
        range_start, range_end = map(float, key.split('-'))
        if range_start <= val < range_end:
            range_values1[key] += 1


s, e = 0, max(dynamic_ranges)
d = 13
e = e + 13 - (e % 13)
ranges1 = np.arange(s, e, d)
range_values2 = {f'{i:.1f}-{i+d:.1f}': 0 for i in ranges1[:-1]}
for val in dynamic_ranges:
    for key in range_values2:
        range_start, range_end = map(float, key.split('-'))
        if range_start <= val < range_end:
            range_values2[key] += 1


ranges3 = np.arange(-1.0, 3.0, 1.0)
range_values3 = {i: 0 for i in ranges3}
for val in ambiguities:
    for key in ranges3:
        if key == val:
            range_values3[key] += 1

fig, ((ax1, ax2), (ax3, _)) = plt.subplots(2, 2, figsize=(10, 8))

ax1.bar(range_values1.keys(), range_values1.values())
ax1.set_title('Density of notes')
ax1.set_xlabel('Density')
ax1.set_ylabel('Count')
ax1.tick_params(axis='x', rotation=45)
ax1.set_ylim(0, max(range_values1.values()) + 1)

ax2.bar(range_values2.keys(), range_values2.values())
ax2.set_title('Dynamic Range (semitones)')
ax2.set_xlabel('Dynamic Range')
ax2.set_ylabel('Count')
ax2.tick_params(axis='x', rotation=45)
ax2.set_ylim(0, max(range_values2.values()) + 1)

ax3.bar(range_values3.keys(), range_values3.values())
ax3.set_title('Ambiguity')
ax3.set_xlabel('Ambiguity')
ax3.set_ylabel('Count')
ax3.set_ylim(min(range_values3.values()) - 1, max(range_values3.values()) + 1)
        
plt.tight_layout()
plt.show()