# GA Implementation

In [3]:
import random
from pedalboard import Pedalboard, Reverb, Chorus, Distortion, Delay, Phaser, Compressor, Gain, Clipping
from pedalboard.io import AudioFile
from collections import defaultdict
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from scipy.ndimage import maximum_filter
from scipy.spatial.distance import cdist
import time
from copy import deepcopy
from statistics import NormalDist
from scipy.ndimage import label as label_features
from scipy.ndimage import maximum_position as extract_region_maximums
import math
from scipy.optimize import curve_fit
from copy import deepcopy 
import pandas as pd
from basic_pitch import ICASSP_2022_MODEL_PATH, inference
from midi2audio import FluidSynth 
from pydub import AudioSegment




In [5]:
def create_individual(effects, effect_structure):
    n_effects_chosen = random.randint(1, len(effects) - 2)
    selected_effects = random.sample(effects, n_effects_chosen)
    
    individ = {}
    for effect in selected_effects:
        if effect in effect_structure:
            structure = effect_structure[effect]
            individ[effect] = {
                param: round(random.uniform(range_[0], range_[1]), 2) 
                for param, (_, range_) in structure.items()
            }
    return individ

In [6]:
n_effects = 6
effects = [i for i in range(n_effects)]
effect_structure = {
    0: { "rate_hz": ('float', (0.0, 100.0)), },# Chorus
    1: { "delay_seconds": ('float', (0.0, 10.0)), },# Delay
    2: { "drive_db": ('float', (0.0, 50.0)), },# Distortion
    3: { "gain_db": ('float', (-50.0, 50.0)) },# Gain
    4: { "depth": ('float', (0.0, 1.0)), },# Phaser
    5: { "wet_level": ('float', (0.0, 1.0)), },# Reverb
}
effects_map = {
    0: 'Chorus',
    1: 'Delay',
    2: 'Distortion',
    3: 'Gain',
    4: 'Phaser',
    5: 'Reverb',
}

Testing

In [101]:
create_individual(effects, effect_structure)

{2: {'drive_db': 47.19}, 1: {'delay_seconds': 0.45}, 0: {'rate_hz': 79.44}}

### Audio with effect creation

In [6]:
def create_effected_audio(board, file_path):
    with AudioFile(file_path) as f:
        output_file = file_path[:-4] + "_output.mp3"
        with AudioFile(output_file, 'w', f.samplerate, f.num_channels) as o:
            while f.tell() < f.frames:
                chunk = f.read(f.samplerate)
                effected = board(chunk, f.samplerate, reset=False)
                o.write(effected)
    return output_file

### Constellation map builder

#### Method 1

In [7]:
def max_window_peaks_calculation(file_path, threshold): 
    #step 1: Set parameters
    N_FFT = 2048  # FFT window size
    N_O = N_FFT // 4

    #step 2: Load audio file
    y, sr = librosa.load(file_path, sr=None)

    #step 3: Compute the spectrogram
    D = librosa.stft(y, n_fft=N_FFT, hop_length=N_O)
    S_db = librosa.amplitude_to_db(np.abs(D), ref=np.max)
    
    #step 4: Define window lengths for peak detection  
    d_t = 15
    d_f = 15

    #step 5: Select settings for peak detection
    #using maximum filter to detect local maxima
    local_max = maximum_filter(S_db, size=(d_t, d_f))

    #step 6: Create an identification matrix
    peaks = (S_db == local_max)  # Peaks are where the original value is equal to the local max

    #step 6.5: Apply an amplitude threshold to filter out insignificant peaks
    amplitude_threshold = threshold  # dB, adjust as needed
    peaks &= (S_db > amplitude_threshold)

    #step 7: Extract peaks (time, frequency, amplitude)
    times = librosa.frames_to_time(np.arange(S_db.shape[1]), sr=sr, hop_length=N_O)
    frequencies = librosa.fft_frequencies(sr=sr, n_fft=N_FFT)

    peak_indices = np.where(peaks)
    peak_times = times[peak_indices[1]]
    peak_freqs = frequencies[peak_indices[0]]
    peak_ampls = S_db[peak_indices]

    #save the peaks to a file (optional)
    peak_data = np.vstack((peak_times, peak_freqs, peak_ampls)).T
    peak_data_filtered = np.array([[round(x, 1), round(y, 1)] for x, y, _ in peak_data])
    
    return peak_data_filtered

#### Method 2

In [8]:
def z_score_peaks_calculation(file_path, threshold):
    y, _ = librosa.load(path=file_path, sr=None)

    #do transformations
    x = librosa.stft(y)
    x = librosa.amplitude_to_db(abs(x))
    
    #remove zero values
    flattened = np.matrix.flatten(x)
    filtered = flattened[flattened > np.min(flattened)]

    #create a normal distribution from frequency intensities
    #then map a zscore onto each intensity value
    ndist = NormalDist(np.mean(filtered), np.std(filtered))
    zscore = np.vectorize(lambda x: ndist.zscore(x))
    zscore_matrix = zscore(x)

    #create label matrix from frequency intensities that are
    #above threshold
    mask_matrix = zscore_matrix > threshold
    labelled_matrix, num_regions = label_features(mask_matrix)
    label_indices = np.arange(num_regions) + 1

    #for each isolated region in the mask, identify the maximum
    #value, then extract it position
    peak_positions = extract_region_maximums(zscore_matrix, labelled_matrix, label_indices)

    #finally, create list of peaks (time, frequency, intensity)
    peaks = [[x, y] for y, x in peak_positions]

    return peaks

### Dissimilarity calculation

#### Method 1

Not used

In [9]:
# Define the Euclidean distance function
def euclidean_distance(point1, point2):
    return np.sqrt((point1[0] - point2[0])**2 + (point1[1] - point2[1])**2)

# Define Gaussian neighborhood function
def neighborhood_function(i, j, n, m, A=2, sigma=5):
    return A * np.exp(-((i + 1 - (n - 1) * (j - 1) / (m - 1))**2) / (2 * sigma**2))

In [10]:
def add_epsilon_to_zeros(D, epsilon):
  D_copy = D.copy()  # Create a copy to avoid modifying the original array
  D_copy[D_copy == 0.0] = epsilon
  return D_copy

# Initialize membership matrix U based on initial distances and neighborhood function
def initialize_membership(X, Y, neighborhood_func, epsilon):
    n, m = len(X), len(Y)
    D = cdist(X, Y, metric='euclidean')
    D = add_epsilon_to_zeros(D, epsilon)
    U = np.zeros((n, m))
    for i in range(n):
        for j in range(m):
            U[i, j] = 1.0 / np.sum([(D[i, j] / (D[i, k])) ** 2 for k in range(m)])
            #U[i, j] *= neighborhood_func(i, j, n, m)
        #U[i, :] /= np.sum([U[i, k] * neighborhood_func(i, k, n, m) for k in range(m)])
    return U
  
# Update membership matrix using FOCM
def update_membership(U, D, gamma, n_clusters, neighborhood_func):
    n, m = U.shape
    for i in range(n):
        for j in range(m):
            denominator = np.sum([(D[i, j] / (D[i, k])) ** (2 / (gamma - 1)) for k in range(n_clusters)])
            U[i, j] = 1.0 / denominator
            #U[i, j] *= neighborhood_func(i, j, n, m) #da rivedere perchè togliere questo

        #U[i, :] /= np.sum([U[i, k] * neighborhood_func(i, k, n, m) for k in range(m)])
    return U
  
# Calculate new cluster centers
def calculate_cluster_centers(U, X, gamma):
  U_gamma = np.power(U.T, gamma)
  return (U_gamma @ X) / np.sum(U_gamma, axis=1, keepdims=True)

##### FOCM algorithm

In [11]:
# FOCM algorithm
def focm(X, Y, n_clusters, max_iter=50, epsilon=1e-4, gamma=5):
    U = initialize_membership(X, Y, neighborhood_function, epsilon)
    V = np.copy(Y)
    prev_U = np.zeros_like(U)

    for _ in range(max_iter):
        # Calculate distances
        D = cdist(X, V, metric='euclidean')
        D = add_epsilon_to_zeros(D, epsilon)

        # Update membership matrix
        U = update_membership(U, D, gamma, n_clusters, neighborhood_function)
        
        # Calculate new cluster centers
        V = calculate_cluster_centers(U, X, gamma)
        
        # Check for convergence
        if np.linalg.norm(U - prev_U) < epsilon:
            break
        prev_U = np.copy(U)
    return U

##### Core function

In [12]:
def dissimilarity_focm(file_path, output_file_path, constellation_map_alg, threshold):
    peaks_1 = constellation_map_alg(file_path, threshold)
    peaks_2 = constellation_map_alg(output_file_path, threshold)
    
    print(f"{file_path}, len peaks: {len(peaks_1)}")
    print(f"{output_file_path}, len peaks: {len(peaks_2)}")
    
    if len(peaks_1) > len(peaks_2):
        n_clusters = len(peaks_2)
        M_A = deepcopy(peaks_1)
        M_B = deepcopy(peaks_2)
    else:
        n_clusters = len(peaks_1)
        M_A = deepcopy(peaks_2)
        M_B = deepcopy(peaks_1) 
    
    n_clusters = len(M_B)
    U = focm(M_A, M_B, n_clusters)
    
    total_distance = 0

    for i in range(len(M_A)):
        for j in range(len(M_B)):
            total_distance += U[i, j] * euclidean_distance(M_A[i], M_B[j])

    return total_distance / (len(M_A) * len(M_B))

#### Method 2

In [13]:
def create_database(hashes):
    database = defaultdict(list)
    for hash_value, time_offset in hashes:
        database[hash_value].append(time_offset)
    return database

def generate_hashes(constellation_map, fan_out=10):
    hashes = []
    for anchor in constellation_map:
        for target in constellation_map:
            if target[0] > anchor[0]:  #ensure target is after anchor in time
                delta_t = target[0] - anchor[0]
                freq1, freq2 = anchor[1], target[1]
                hash_value = (freq1, freq2, delta_t)
                hashes.append((hash_value, anchor[0]))  # (hash_value, time_offset)
                
                #limit the fan-out to a certain number of target points
                if len(hashes) >= fan_out:
                    break
    return hashes

def search_database(database, sample_hashes):
    match_offsets = []
    for hash_value, sample_offset in sample_hashes:
        if hash_value in database:
            for track_offset in database[hash_value]:
                match_offsets.append((track_offset, sample_offset))
                database[hash_value] = database[hash_value][1:]
                break
    return match_offsets

##### Core Function

In [67]:
def dissimilarity_lines_difference(m1, q1, m2, q2, intersection_too_far):
    if m1 == m2 or intersection_too_far:
        if q1 == q2:
            return 0.0
        else:
            return abs(q2 - q1) / math.sqrt(1 + m1**2) #distance of the 2 parallel lines
    else: #lines intersect
        if m1 * m2 == -1:
            return 90.0
        else:
            tan_theta = abs((m1 - m2) / (1 + m1 * m2))
            theta_radians = math.atan(tan_theta)
            theta_degrees = math.degrees(theta_radians)
            return theta_degrees

### Fitness function

#### Method 1

In [15]:
def fitness_focm(individual, file_path, effected_file_path, constellation_map_alg, threshold):
    board = Pedalboard([])
    for effect_key, params in individual.items():
        effect_class = globals()[effects_map[effect_key]]
        board.append(effect_class(**params))
        
    output_file_path = create_effected_audio(board, file_path)
    diss = dissimilarity_focm(effected_file_path, output_file_path, constellation_map_alg, threshold)
    print(f"Individ: {individual} : diss: {diss}")
    return diss

Testing (modify file_path)

In [None]:
#Testing fitness function
individ = {5: {'wet_level': 0.8}}
file_path = "G4_guitar.mp3"
effected_file_path = "G4_guitar_Reverb_high.mp3"
fitness_focm(individ, file_path, effected_file_path, max_window_peaks_calculation, -20)

#### Method 2

In [17]:
def linear_func(x, a, b):
    return a * x + b

In [98]:
def fitness_lines_difference(individual, plain_audio_path, hash_table, m1, q1, original_or_times, constellation_map_alg, threshold, fan_out):
    board = Pedalboard([])
    for effect_key, params in individual.items():
        effect_class = globals()[effects_map[effect_key]]
        board.append(effect_class(**params))
        
    output_file_path = create_effected_audio(board, plain_audio_path)
    
    new_peaks = constellation_map_alg(output_file_path, threshold)
    new_hashes = generate_hashes(new_peaks, fan_out)
    copy_hash_table = deepcopy(hash_table)
    time_pairs = search_database(copy_hash_table, new_hashes)
    
    #print(len(original_or_times))
    #print(len(time_pairs))
    if len(time_pairs) >= 2:
        or_times, sample_times = zip(*time_pairs)
        if (len(or_times) / len(original_or_times)) >= 0.1: #there is at least some confidence
            #print(len(original_or_times) / len(or_times))
            popt1, _ = curve_fit(linear_func, or_times, sample_times)
            m2, q2 = popt1
            
            max_x = max(original_or_times)
            x_intersection =  (q2 - q1)/(m1 - m2)

            #print(m1, q1, m2, q2)
            diss = dissimilarity_lines_difference(m1, q1, m2, q2, x_intersection >= max_x * 0.9 or x_intersection <= max_x * 0.1) * (len(original_or_times) / len(or_times))
        else:
            diss = 10000.0
    else:
        diss = 10000.0
    #print(f"Individ: {individual} : diss: {diss}")
    return diss

### Mutation

In [4]:
def mutation(individual, p_pop_item, p_add_new_effect):
    if not individual: 
        effect = random.randint(0, n_effects - 1)
        structure = effect_structure[effect]
        new_gene = {
            effect: 
            {param: round(random.uniform(range_[0], range_[1]), 2) for param, (_, range_) in structure.items()}
        }
        return new_gene 
    
    offspring = deepcopy(individual)
    items = list(offspring.items())
    
    available_effects = set(effects) - set(offspring.keys())
    if not available_effects:
        if random.random() > p_pop_item:
            items.pop(random.randrange(len(items))) 
            return dict(items)
        else:
            return dict(items)
        
    effect = random.choice(list(available_effects))
    structure = effect_structure[effect]
    
    #randomly decide between replacing an existing effect or adding a new one
    if random.random() > p_add_new_effect:
        new_gene = (
            effect, 
            {param: round(random.uniform(range_[0], range_[1]), 2) 
             for param, (_, range_) in structure.items()}
        )
        items.append(new_gene)
    else:
        index = random.randint(0, len(items) - 1)
        new_gene = (
            effect, 
            {param: round(random.uniform(range_[0], range_[1]), 2) 
             for param, (_, range_) in structure.items()}
        )
        items[index] = new_gene
    
    return dict(items)

In [14]:
from math import ceil

def aggressive_mutation(individual, p_pop_item, p_add_new_effect, effects, effect_structure, p_mutation):
    k = ceil(p_mutation * len(individual))  #number of mutation operations
    print(k)
    offspring = deepcopy(individual)
    
    for _ in range(k):  
        if not offspring: 
            effect = random.randint(0, len(effects) - 1)
            structure = effect_structure[effect]
            new_gene = {
                effect: 
                {param: round(random.uniform(range_[0], range_[1]), 2) for param, (_, range_) in structure.items()}
            }
            offspring = new_gene
            continue

        items = list(offspring.items())
        
        if random.random() > p_pop_item and len(items) > 0:
            items.pop(random.randrange(len(items))) 
            offspring = dict(items)
            continue
        
        available_effects = set(effects) - set(offspring.keys())
        
        if not available_effects:  # No available effects to add, just remove if p_pop_item > random
            if random.random() > p_pop_item and len(items) > 0:
                items.pop(random.randrange(len(items))) 
                offspring = dict(items)
            continue
        
        effect = random.choice(list(available_effects))
        structure = effect_structure[effect]
        
        # Decide between replacing an existing effect or adding a new one
        if random.random() > p_add_new_effect:  # Add a new effect
            new_gene = (
                effect, 
                {param: round(random.uniform(range_[0], range_[1]), 2) 
                 for param, (_, range_) in structure.items()}
            )
            items.append(new_gene)
        else:  # Replace an existing effect
            index = random.randint(0, len(items) - 1)
            new_gene = (
                effect, 
                {param: round(random.uniform(range_[0], range_[1]), 2) 
                 for param, (_, range_) in structure.items()}
            )
            items[index] = new_gene
        
        offspring = dict(items)
    
    return offspring

In [8]:
def inner_mutation(individual):
    if not individual: 
        return individual
    offspring = deepcopy(individual)
    key = random.choice(list(offspring.keys()))
    structure = effect_structure[key]
    offspring[key] = {
        param: round(random.uniform(range_[0], range_[1]), 2) 
        for param, (_, range_) in structure.items()
    }
    return offspring

In [90]:
n_effects = 6
effects = [i for i in range(n_effects)]
effect_structure = {
    0: { "rate_hz": ('float', (0.0, 100.0)), },# Chorus
    1: { "delay_seconds": ('float', (0.0, 10.0)), },# Delay
    2: { "drive_db": ('float', (0.0, 50.0)), },# Distortion
    3: { "gain_db": ('float', (-50.0, 50.0)) },# Gain
    4: { "depth": ('float', (0.0, 1.0)), },# Phaser
    5: { "wet_level": ('float', (0.0, 1.0)), },# Reverb
}
effects_map = {
    0: 'Chorus',
    1: 'Delay',
    2: 'Distortion',
    3: 'Gain',
    4: 'Phaser',
    5: 'Reverb',
}

individ = {7: {'threshold_db': -31.18},
 4: {'depth': 0.11},
 5: {'wet_level': 0.04},
 0: {'rate_hz': 78.32},
 2: {'drive_db': 21.4}}

print(aggressive_mutation(individ, 0.5, 0.5, effects, effect_structure, 0.7))

4
{7: {'threshold_db': -31.18}, 1: {'delay_seconds': 1.08}, 0: {'rate_hz': 78.32}, 3: {'gain_db': -15.05}}


In [75]:
individ = {}
inner_mutation(individ)

NameError: name 'inner_mutation' is not defined

### Crossover

In [29]:
from copy import deepcopy
import random

def crossover(parent_1, parent_2):
    offspring_1 = deepcopy(parent_1)
    offspring_2 = deepcopy(parent_2)
    
    set1, set2 = set(parent_1.keys()), set(parent_2.keys())
    common_keys = list(set1 & set2) #find common and different keys
    different_keys = list(set1 ^ set2)

    #modify offspring based on common elements
    if common_keys:
        index_1 = len(common_keys) // 2
        for i in range(index_1):
            key = common_keys[i]
            offspring_1[key], offspring_2[key] = offspring_2[key], offspring_1[key]

    #modify offspring based on symmetric difference elements
    if different_keys:
        index_2 = len(different_keys) // 2
        for j in range(index_2):
            key = different_keys[j]
            if key in offspring_1:
                offspring_2[key] = offspring_1.pop(key)
            else:
                offspring_1[key] = offspring_2.pop(key)

    return offspring_1, offspring_2

In [30]:
indv1 = {4: {'depth': 0.8}, 2: {'drive_db': 24.83}, 5: {'wet_level': 0.53}, 3: {'gain_db': 13.94}, 0: {'rate_hz': 65.17}}
indv2 = {5: {'wet_level': 0.59}, 4: {'depth': 0.47}, 0: {'rate_hz': 78.04}}

o1, o2 = crossover(indv1, indv2)
print(indv1)
print(indv2)
print(o1)
print(o2)


1
0
{4: {'depth': 0.8}, 2: {'drive_db': 24.83}, 5: {'wet_level': 0.53}, 3: {'gain_db': 13.94}, 0: {'rate_hz': 65.17}}
{5: {'wet_level': 0.59}, 4: {'depth': 0.47}, 0: {'rate_hz': 78.04}}
{4: {'depth': 0.8}, 5: {'wet_level': 0.53}, 3: {'gain_db': 13.94}, 0: {'rate_hz': 78.04}}
{5: {'wet_level': 0.59}, 4: {'depth': 0.47}, 0: {'rate_hz': 65.17}, 2: {'drive_db': 24.83}}


### Selection Criteria

Variation 1

In [54]:
def tournament_selection_focm(file_path, effected_file_path, constellation_map_alg, threshold, fit, pop, t_size):
    tournament = random.choices(pop, k=t_size)
    return min(tournament, key=lambda ind: fit(ind, file_path, effected_file_path, constellation_map_alg, threshold))

Variation 2

In [55]:
def tournament_selection_lines_difference(plain_audio_path, hash_table, m1, q1, or_times, constellation_map_alg, threshold, fan_out, fit, pop, t_size):
    tournament = random.choices(pop, k=t_size)
    return min(tournament, key=lambda ind: fit(ind, plain_audio_path, hash_table, m1, q1, or_times, constellation_map_alg, threshold, fan_out))

### Population Initialization

In [57]:
def init_population(pop_size, effects, effect_structure):
    pop = []
    for _ in range(pop_size):
        pop.append(create_individual(effects, effect_structure))
    return pop

### GA with FOCM

In [95]:
def GA_focm(file_path, effected_file_path, constellation_map_alg, threshold, fit, pop_size, p_pop_item, p_add_new_effect, n_iter, t_size):
  pop = init_population(pop_size, effects, effect_structure)
  
  best = {}

  for i in range(0, n_iter):
    print(f"Iteration: {i}")
    selected = [tournament_selection_focm(file_path, effected_file_path, constellation_map_alg, threshold, fit, pop, t_size) for _ in range(0, pop_size)]
    pairs = [[selected[i], selected[i + 1]] for i in range(len(selected))]
    
    offsprings = []
    for x, y in pairs:
        of1, of2 = crossover(x, y)
        if random.choice([True, False]):
          offsprings.append(of1)
        else:
          offsprings.append(of2)
        
    pop = [mutation(inner_mutation(x), p_pop_item, p_add_new_effect) for x in offsprings]

    candidate_best = min(pop, key=lambda ind: fit(ind, file_path, effected_file_path, constellation_map_alg, threshold))
    print(f"Best candidate: {candidate_best}")
    print(f"\nCandidate fitness: {fit(candidate_best, file_path, effected_file_path, constellation_map_alg, threshold)} , best fitness: {fit(best, file_path, effected_file_path, constellation_map_alg, threshold)}")
    if fit(candidate_best, file_path, effected_file_path, constellation_map_alg, threshold) < fit(best, file_path, effected_file_path, constellation_map_alg, threshold):
      best = candidate_best
    print(f"Best fitness at generation {i}: {fit(best, file_path, effected_file_path, constellation_map_alg, threshold)}\n")
  return best

In [None]:
original_audio = "G4_guitar.mp3"
effected_audio = "G4_guitar_Delay_high.mp3"
constellation_map_alg = z_score_peaks_calculation
fitness = fitness_focm
threshold = 3
pop_size = 30
p_pop_item = 0.2
p_add_new_effect = 0.5
n_iter = 10
t_size = 5

GA_focm(original_audio, effected_audio, constellation_map_alg, threshold, fitness, pop_size, p_pop_item, p_add_new_effect, n_iter, t_size)

### GA with straight line comparison

In [117]:
def GA_lines_comp(plain_audio_path, desired_audio_path, constellation_map_alg, threshold, fan_out, fit, pop_size, p_pop_item, p_add_new_effect, n_iter, t_size):
  pop = init_population(pop_size, effects, effect_structure)
  
  peaks_original = constellation_map_alg(desired_audio_path, threshold)
  hashes0 = generate_hashes(peaks_original, fan_out)
  hash_table = create_database(hashes0)

  peaks_copy_original = constellation_map_alg(desired_audio_path, threshold)
  hashes1 = generate_hashes(peaks_copy_original, fan_out)
  copy_hash_table = deepcopy(hash_table)
  time_pairs = search_database(copy_hash_table, hashes1)
  or_times, sample_times = zip(*time_pairs)
  popt1, _ = curve_fit(linear_func, or_times, sample_times)
  m1, q1 = popt1
  
  best = {}

  for i in range(0, n_iter):
    time_start = time.time()
    print(f"Iteration: {i}")
    selected = [tournament_selection_lines_difference(plain_audio_path, hash_table, m1, q1, or_times, constellation_map_alg, threshold, fan_out, fit, pop, t_size) for _ in range(0, pop_size)]
    pairs = [[selected[i], selected[(i + 1) % len(selected)]] for i in range(len(selected))]    
    
    offsprings = []
    for x, y in pairs:
        of1, of2 = crossover(x, y)
        if random.choice([True, False]):
          offsprings.append(of1)
        else:
          offsprings.append(of2)

    pop = [mutation(inner_mutation(x), p_pop_item, p_add_new_effect) for x in offsprings]
    print(len(pop))

    candidate_best = min(pop, key=lambda ind: fit(ind, plain_audio_path, hash_table, m1, q1, or_times, constellation_map_alg, threshold, fan_out))
    print(f"Best candidate: {candidate_best}")
    print(f"\nCandidate fitness: {fit(candidate_best, plain_audio_path, hash_table, m1, q1, or_times, constellation_map_alg, threshold, fan_out)} , best fitness: {fit(best, plain_audio_path, hash_table, m1, q1, or_times, constellation_map_alg, threshold, fan_out)}")
    if fit(candidate_best, plain_audio_path, hash_table, m1, q1, or_times, constellation_map_alg, threshold, fan_out) < fit(best, plain_audio_path, hash_table, m1, q1, or_times, constellation_map_alg, threshold, fan_out):
      best = candidate_best
    print(f"Best fitness at generation {i}: {fit(best, plain_audio_path, hash_table, m1, q1, or_times, constellation_map_alg, threshold, fan_out)}\n")
    print(f'Time for iteration {i}: {time.time() - time_start}')
    if fit(best, plain_audio_path, hash_table, m1, q1, or_times, constellation_map_alg, threshold, fan_out) < 0.5:
      break
  return best

In [None]:
plain_audio = "../audios/G4_guitar.mp3"
desired_audio = "../audios/G4_guitar_Delay_low.mp3"
constellation_map_alg = z_score_peaks_calculation
fitness = fitness_lines_difference
threshold = 1.5
fan_out = 20
pop_size = 100
p_pop_item = 0.8,
p_add_new_effect = 0.5
n_iter = 20
t_size = 5

GA_lines_comp(plain_audio, desired_audio, constellation_map_alg, threshold, fan_out, fitness, pop_size, p_pop_item, p_add_new_effect, n_iter, t_size)

### GA applied to dataset

In [63]:
def mp3_to_midi(audio_path, midi_path, note_segmentation, model_confidence, instrument_program):
    _, midi_data, __ = inference.predict(
        audio_path,    
        model_or_model_path = ICASSP_2022_MODEL_PATH, 
        onset_threshold = note_segmentation, # 0.6 note segmentation 1) how easily a note should be split into two. (split notes <- ..0.5.. -> merge notes)
        frame_threshold = model_confidence, # 0.6 model confidence threshold 2) the model confidence required to create a note. (more notes <- ..0.3.. -> fewer notes)
    )

    for instrument in midi_data.instruments:
        instrument.program = instrument_program #distortion guitar program 30

    midi_data.write(midi_path)

In [64]:
def midi_to_mp3(midi_file, audio_path, soundfont):
    #convert MIDI to WAV using FluidSynth
    fs = FluidSynth(soundfont)
    wav_file = midi_file.replace('.midi', '.wav').replace('.mid', '.wav')
    fs.midi_to_audio(midi_file, wav_file)

    #convert WAV to MP3 using pydub
    sound = AudioSegment.from_wav(wav_file)
    sound.export(audio_path, format="mp3")

    print(f"Conversion complete: {audio_path}")

In [113]:
def dataset_GA_execution(df, 
                         note_segmentation,
                         model_confidence,
                         instrument_program,
                         constellation_map_alg,
                         fitness,
                         threshold,
                         fan_out,
                         pop_size,
                         p_pop_item,
                         p_add_new_effect,
                         n_iter,
                         t_size,
                         soundfont,
                         clear_audio_path,
                         midi_path):
    temp_data = []

    for _, row in df.iterrows():
        desired_audio_path = '../dataset_creation/audios/' + row["audio_name"]
        mp3_to_midi(desired_audio_path, midi_path, note_segmentation, model_confidence, instrument_program)
        midi_to_mp3(midi_path, clear_audio_path, soundfont)
        
        best_invdivid = GA_lines_comp(clear_audio_path, desired_audio_path, constellation_map_alg, threshold, fan_out, fitness, pop_size, p_pop_item, p_add_new_effect, n_iter, t_size)    
        temp_data.append(best_invdivid)
        df['GA_effects'] = pd.Series(temp_data)
        df.to_csv('../dataset_creation/dataset_audios.csv', index=False)

In [118]:
df = pd.read_csv('../dataset_creation/dataset_audios.csv')

note_segmentation = 0.6
model_confidence = 0.6
instrument_program = 30
constellation_map_alg = z_score_peaks_calculation
fitness = fitness_lines_difference
threshold = 1.5
fan_out = 20
pop_size = 100
p_pop_item = 0.8
p_add_new_effect = 0.2
n_iter = 10
t_size = 5
soundfont = '../audio2midi2audio/FluidR3_GM.sf2'  # Path to your SoundFont file
clear_audio_path = 'clear_audio.mp3'
midi_path = 'clear_midi.mid'

dataset_GA_execution(df, 
                    note_segmentation,
                    model_confidence,
                    instrument_program,
                    constellation_map_alg,
                    fitness,
                    threshold,
                    fan_out,
                    pop_size,
                    p_pop_item,
                    p_add_new_effect,
                    n_iter,
                    t_size,
                    soundfont,
                    clear_audio_path,
                    midi_path)

Predicting MIDI for ../dataset_creation/audios/0.mp3...
Conversion complete: clear_audio.mp3
Iteration: 0
100
Best candidate: {1: {'delay_seconds': 3.89}, 5: {'wet_level': 0.98}, 2: {'drive_db': 43.72}, 4: {'depth': 0.43}}

Candidate fitness: 276.7267454365586 , best fitness: 10000.0
Best fitness at generation 0: 276.7267454365586

Time for iteration 0: 469.39876651763916
Iteration: 1
100
Best candidate: {3: {'gain_db': 35.84}, 1: {'delay_seconds': 3.41}, 2: {'drive_db': 15.78}}

Candidate fitness: 302.7279769649559 , best fitness: 276.7267454365586
Best fitness at generation 1: 276.7267454365586

Time for iteration 1: 460.27390336990356
Iteration: 2


KeyboardInterrupt: 

Tests

In [8]:
df = pd.read_csv('../dataset_creation/dataset_audios.csv')

data = [{6: {'threshold_db': 0.09}},]

# Assign the list to the new column
df['GA_effects'] = pd.Series(data)

df.to_csv('../dataset_creation/dataset_audios.csv', index=False)

In [2]:
individual = {}

for effect_key, params in individual.items():
    print(effect_key, params)

In [4]:
import pandas as pd

df = pd.read_csv('dataset_model_test.csv')

print(df.shape)


(500, 4)
