In [1]:
import librosa
import pickle
import numpy as np

with open("model/their_tuned_model.pkl", 'rb') as file:
    pickle_model = pickle.load(file)

from joblib import load
scaler = load('model/std_scaler.bin')

instr_list = ["flu", "gac", "org", "pia", "tru", "voi"]

def score_prob_of_being(y, sr=44100, being=None, sim=False):
    rms = librosa.feature.rms(y=y)
    spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
    spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
    rolloff = librosa.feature.spectral_rolloff(y=y, sr=sr)
    zcr = librosa.feature.zero_crossing_rate(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr)
    to_append = f'{np.mean(rms)} {np.mean(spec_cent)} {np.mean(spec_bw)} {np.mean(rolloff)} {np.mean(zcr)}'
    for e in mfcc:
        to_append += f' {np.mean(e)}'

    df = np.array([[float(x) for x in to_append.split(" ")]])
    
    X = scaler.transform(df)
    
    if being:
        mse = np.sqrt(np.mean(np.power(y - orig_y, 2))) if sim else 0
        return pickle_model.predict_proba(X)[0][instr_list.index(being)] - mse
    else:
        return pickle_model.predict_proba(X)[0]

In [2]:
songname = "dataset/IRMAS-TrainingData/flu/008__[flu][nod][cla]0393__1.wav"
y, sr = librosa.load(songname, sr=44100)
print(score_prob_of_being(y, sr, "flu"))

0.9959833725953245


In [7]:
import numpy as np
from deap import base, creator, tools, algorithms

# Define the problem: Maximizing the fitness of the spectrogram
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)

songname = "dataset/IRMAS-TrainingData/flu/008__[flu][nod][cla]0393__1.wav"
orig_y, sr = librosa.load(songname, sr=44100)
orig_y = np.real(librosa.istft(np.real(librosa.stft(orig_y))))
tricked_class = "tru"

# Define the functions for initialization, mutation, crossover, and evaluation
def init_individual():
    y, sr = librosa.load(songname, sr=44100)
    return np.real(librosa.stft(y))

def mutate(individual, indpb):
    # Mutate an individual by adding a small random value to each element
    individual += np.random.normal(0, indpb, individual.shape)
    return individual,

def crossover(parent1, parent2):
    # Perform crossover by taking the average of corresponding elements
    return (parent1 + parent2) / 2,

def evaluate(individual):
    # Score the individual based on the probability of the model to predict it as what we want it to be
    return score_prob_of_being(np.real(librosa.istft(individual)), 44100, tricked_class, True),

# Set up the DEAP framework
toolbox = base.Toolbox()
toolbox.register("individual", tools.initIterate, creator.Individual, init_individual)
toolbox.register("population", tools.initRepeat, list, toolbox.individual)
toolbox.register("mate", tools.cxTwoPoint)
toolbox.register("mutate", mutate, indpb=0.05)
toolbox.register("select", tools.selTournament, tournsize=3)
toolbox.register("evaluate", evaluate)

# Create an initial population
population_size = 25
population = toolbox.population(n=population_size)

# Run the evolution
generations = 50
stats = tools.Statistics(key=lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("max", np.max)
population, logbook = algorithms.eaMuPlusLambda(population, toolbox, mu=population_size, lambda_=population_size*2,
                                                cxpb=0.7, mutpb=0.2, ngen=generations, stats=stats, halloffame=None, verbose=True)



gen	nevals	avg       	max       
0  	25    	0.00285597	0.00285597
1  	48    	0.0873035 	0.266852  
2  	48    	0.241089  	0.273939  
3  	44    	0.272128  	0.273939  
4  	45    	0.273181  	0.274128  
5  	44    	0.273691  	0.274128  
6  	47    	0.274137  	0.274337  
7  	47    	0.274141  	0.274394  
8  	48    	0.274303  	0.274398  
9  	45    	0.274305  	0.274398  
10 	47    	0.274378  	0.274398  
11 	46    	0.274393  	0.274399  
12 	43    	0.274399  	0.274407  
13 	45    	0.274402  	0.274407  
14 	48    	0.274404  	0.274407  
15 	46    	0.274405  	0.274407  
16 	45    	0.274406  	0.274407  
17 	44    	0.274406  	0.274407  
18 	43    	0.274407  	0.274407  
19 	41    	0.274407  	0.274407  
20 	46    	0.274305  	0.274407  
21 	38    	0.274407  	0.274407  
22 	45    	0.274407  	0.274407  
23 	49    	0.274407  	0.274407  
24 	48    	0.274407  	0.274407  
25 	45    	0.274407  	0.274407  
26 	47    	0.274407  	0.274407  
27 	47    	0.274407  	0.274407  
28 	41    	0.274407  	0.274407  
29 	46    

In [8]:
# Access the best individual after evolution
best_individual = tools.selBest(population, k=1)[0]
best_fitness = best_individual.fitness.values[0]

print("Best Individual:", best_individual)
print(score_prob_of_being(np.real(librosa.istft(best_individual)), 44100))
print("Best Fitness:", best_fitness)

Best Individual: [[-4.49702173e-01 -1.86226800e-01  3.12794894e-02 ...  8.28555133e-03
  -2.14113310e-01 -1.13557053e+00]
 [ 5.04893243e-01 -7.72858933e-02  6.96726004e-03 ...  2.93891365e-03
  -1.97693199e-01  9.34747517e-01]
 [-5.09239912e-01  4.14133668e-01 -1.22201994e-01 ...  1.42421825e-02
   5.75620942e-02 -3.63676488e-01]
 ...
 [ 3.79510671e-02 -1.88799147e-02 -2.35791413e-05 ... -2.58379558e-04
  -2.47275253e-04  1.52828742e-03]
 [-3.79854143e-02 -1.72302491e-04 -1.51047148e-04 ...  2.66754651e-04
   5.87528048e-04 -3.88447708e-03]
 [ 3.80808227e-02  1.92405190e-02  2.64828297e-04 ... -2.14469546e-04
   1.02421083e-03  4.75622527e-03]]
[0.29720414 0.05132928 0.16849456 0.07361579 0.27486545 0.13449078]
Best Fitness: 0.27440746732911625


In [9]:
from IPython.lib.display import Audio
from IPython.display import display

orig, sr = librosa.load(songname, sr=44100)
print(orig, sr)
modi = librosa.istft(best_individual)
display(Audio(data=orig, rate=sr))
display(Audio(data=modi, rate=sr))

[ 0.0753479   0.07347107  0.06999207 ... -0.00190735  0.00346375
  0.00808716] 44100


KeyboardInterrupt: 