In [None]:
import json
import numpy as np
import matplotlib.pyplot as plt
import statistics

In [None]:
CHORD_PROGRESSIONS_JSON = "../out/chord_progressions.json"

with open(CHORD_PROGRESSIONS_JSON) as f:
    songs = json.load(f)
    assert isinstance(songs, list)

songs = [song for song in songs if len(song) > 8]

In [None]:
from chord_parser import chord_parser_raw

def count_variants(songs):
    variants_count = {}
    for song in songs:
        for chord in song:
            _, variant = chord_parser_raw(chord)
            variants_count[variant] = variants_count.setdefault(variant, 0) + 1
    return variants_count

chord_variants = count_variants(songs)
total_chords = sum(chord_variants.values())
chord_variant_probabilities = {chord: count / total_chords for chord, count in chord_variants.items()}

chord_variant_probabilities

In [None]:
chord_names = list(chord_variants.keys())
chord_counts = list(chord_variants.values())

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6), constrained_layout=True)

ax1.barh(chord_names, chord_counts, color='skyblue')
ax1.set_xlabel('Count')
ax1.set_ylabel('Chord Type')
ax1.set_title('Count of Chord Types (Log)')
ax1.set_xscale('log')

ax2.pie(chord_variant_probabilities.values(), labels=chord_variant_probabilities.keys(), autopct='%1.1f%%', startangle=140)
ax2.set_title('Probability of chord types')

plt.show()

In [None]:
def measure_complexity(song):
    # 1. variance of chord_complexity
    # 2. average chord_complexity
    # 3. entropy of root notes

    song_complexity = []
    root_note_counts = {}
    for chord in song:
        root, variant = chord_parser_raw(chord)
        song_complexity.append(chord_variant_probabilities[variant])
        root_note_counts[root] = root_note_counts.get(root, 0) + 1

    counts = np.array(list(root_note_counts.values()))
    probabilities = counts / np.sum(counts)
    entropy = -np.sum(probabilities * np.log2(probabilities))
    average_chord_complexity = statistics.mean(song_complexity)
    variance_of_chord_complexity = statistics.variance(song_complexity)
    return variance_of_chord_complexity, average_chord_complexity, entropy

song_complexities = list(map(measure_complexity, songs))

In [None]:
def describe(label, data):
    mean = statistics.mean(data)
    median = statistics.median(data)
    mode = statistics.mode(data)
    std_dev = statistics.stdev(data)
    variance = statistics.variance(data)
    minimum = min(data)
    maximum = max(data)

    print(label)
    print("Mean:", mean)
    print("Median:", median)
    print("Mode:", mode)
    print("Standard Deviation:", std_dev)
    print("Variance:", variance)
    print("Minimum:", minimum)
    print("Maximum:", maximum)
    print()

variances, means, entropies = zip(*song_complexities)

describe("Variance of chord complexity", variances)
describe("Mean of chord complexity", means)
describe("Entropy of root notes", entropies)

In [None]:
augmented_dataset = list(zip(song_complexities, songs))

with open("../out/chord_progressions_augmented.json", "w") as f:
    json.dump(augmented_dataset, f)