# This notebook is purely for groundtruth generation

### Remarques de Lionel:

`` L’ensemble de validation sert à monitorer les performances au cours de l’apprentissage. Il est distinct de l’ensemble de test. Il faudra que nous soyons bien d’accord sur ces points de méthodologie importants.``
1. Il est toujours possible de générer d’autres découpes avec la fonction train_test_split de sklearn.model_selection (scikit-learn) ou de faire des ensembles de cross-validation avec sklearn.model_selection.StratifiedKFold (bien vérifier dans ce cas que les distributions des labels dans les différents ensembles sont les mêmes, pour garder l’hypothèse i.i.d = indépendants et identiquement distribués).
2. De mémoire les fichiers audio sont à 44.1 kHz. Je les ai sous-échantillonnés à 32 kHz avec la fonction resample de la bibliothèque librosa https://librosa.github.io/librosa/
3. Par ailleurs, je vous invite à regarder l’implémentation d’un réseau convolutionnel à partir d’un spectrogramme : https://github.com/marl/openl3/blob/master/openl3/models.py (Cette information est importante )
4. Ce qui me paraît bizarre, c’est la couche de normalisation qui suit le spectrogramme et qui n’est pas la même dans les deux cas…

In [1]:
import pandas as pd
import numpy as np
import librosa

Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit


## Generating the .npy of groundtruths for DCASE 2018 dataset:

#### Opening the .csv

In [7]:
file_path = "train.csv"
data_frame = pd.read_csv("train.csv")

# Showing 47 rows:
data_frame.iloc[:47]

Unnamed: 0,fname,label,manually_verified
0,00044347.wav,Hi-hat,0
1,001ca53d.wav,Saxophone,1
2,002d256b.wav,Trumpet,0
3,0033e230.wav,Glockenspiel,1
4,00353774.wav,Cello,1
5,003b91e8.wav,Cello,0
6,003da8e5.wav,Knock,1
7,0048fd00.wav,Gunshot_or_gunfire,1
8,004ad66f.wav,Clarinet,0
9,0063ab88.wav,Computer_keyboard,0


#### Converting classes to numbers:

In [9]:
# creating a dictionnary to convert the classes in number


labels_to_number = {}
number_of_labels = 41 # according to the kaggle competition


countLabels = 0


for index, row in data_frame.iterrows():
    fileName = row['fname']
    label    = row['label']
    
    if(label not in labels_to_number.keys()):
        labels_to_number[label] = countLabels
        countLabels += 1

In [12]:
# Shows the label values:

print(labels_to_number)
print("\n")

if(len(labels_to_number.keys()) == number_of_labels):
    print("The number of labels is equal to what Kaggle announced")
else:
    print("The number of labels is not equal to what Kaggle announced")

{'Hi-hat': 0, 'Saxophone': 1, 'Trumpet': 2, 'Glockenspiel': 3, 'Cello': 4, 'Knock': 5, 'Gunshot_or_gunfire': 6, 'Clarinet': 7, 'Computer_keyboard': 8, 'Keys_jangling': 9, 'Snare_drum': 10, 'Writing': 11, 'Laughter': 12, 'Tearing': 13, 'Fart': 14, 'Oboe': 15, 'Flute': 16, 'Cough': 17, 'Telephone': 18, 'Bark': 19, 'Chime': 20, 'Bass_drum': 21, 'Bus': 22, 'Squeak': 23, 'Scissors': 24, 'Harmonica': 25, 'Gong': 26, 'Microwave_oven': 27, 'Burping_or_eructation': 28, 'Double_bass': 29, 'Shatter': 30, 'Fireworks': 31, 'Tambourine': 32, 'Cowbell': 33, 'Electric_piano': 34, 'Meow': 35, 'Drawer_open_or_close': 36, 'Applause': 37, 'Acoustic_guitar': 38, 'Violin_or_fiddle': 39, 'Finger_snapping': 40}


The number of labels is equal to what Kaggle announced


In [13]:
# Creating the dictionary of groundtruths:

groundtruths = {}

for index, row in data_frame.iterrows():
    fileName = row['fname']
    label    = row['label']
    
    groundtruths[fileName] = labels_to_number[label]

groundtruths

{'00044347.wav': 0,
 '001ca53d.wav': 1,
 '002d256b.wav': 2,
 '0033e230.wav': 3,
 '00353774.wav': 4,
 '003b91e8.wav': 4,
 '003da8e5.wav': 5,
 '0048fd00.wav': 6,
 '004ad66f.wav': 7,
 '0063ab88.wav': 8,
 '006f2f32.wav': 0,
 '0075d39c.wav': 9,
 '00780200.wav': 10,
 '0079d310.wav': 11,
 '0091fc7f.wav': 4,
 '0097160c.wav': 12,
 '00ad7068.wav': 12,
 '00c5808a.wav': 13,
 '00c82919.wav': 14,
 '00c934d7.wav': 12,
 '00c9e799.wav': 15,
 '00cb787c.wav': 16,
 '00ce569f.wav': 0,
 '00d1fe46.wav': 17,
 '00d3bba3.wav': 18,
 '00d40fa2.wav': 10,
 '00d9fa61.wav': 16,
 '00e2b4cd.wav': 19,
 '00f88dc5.wav': 3,
 '00fbb28b.wav': 20,
 '00fcbab2.wav': 21,
 '010aa387.wav': 14,
 '011a2185.wav': 4,
 '0120d246.wav': 22,
 '01235a12.wav': 23,
 '01257aad.wav': 24,
 '01302128.wav': 25,
 '013264d3.wav': 23,
 '013c3135.wav': 7,
 '01506d76.wav': 24,
 '015cf474.wav': 7,
 '0160d55e.wav': 26,
 '01638f61.wav': 11,
 '0172a2a5.wav': 27,
 '017ea24e.wav': 28,
 '01811e48.wav': 25,
 '0184c390.wav': 4,
 '018863f5.wav': 29,
 '018a10bb.

#### Saving the dictionary to .npy file :

In [14]:
np.save("DCASE_train_labels.npy", groundtruths)

#### Testing the save:

In [18]:
test = np.load("DCASE_train_labels.npy").item()
len(test)

9473

#### Converting it to tensors:

In [2]:
lab_dict   = np.load("DCASE_train_labels.npy").item()
lab_dict_t = {}

for i, key in enumerate(lab_dict.keys()):
    new_key = key.split(".")[0] + ".pt"
    if(i<5):print(new_key)
    lab_dict_t[new_key] = lab_dict[key]

lab_dict_t

00044347.pt
001ca53d.pt
002d256b.pt
0033e230.pt
00353774.pt


{'00044347.pt': 0,
 '001ca53d.pt': 1,
 '002d256b.pt': 2,
 '0033e230.pt': 3,
 '00353774.pt': 4,
 '003b91e8.pt': 4,
 '003da8e5.pt': 5,
 '0048fd00.pt': 6,
 '004ad66f.pt': 7,
 '0063ab88.pt': 8,
 '006f2f32.pt': 0,
 '0075d39c.pt': 9,
 '00780200.pt': 10,
 '0079d310.pt': 11,
 '0091fc7f.pt': 4,
 '0097160c.pt': 12,
 '00ad7068.pt': 12,
 '00c5808a.pt': 13,
 '00c82919.pt': 14,
 '00c934d7.pt': 12,
 '00c9e799.pt': 15,
 '00cb787c.pt': 16,
 '00ce569f.pt': 0,
 '00d1fe46.pt': 17,
 '00d3bba3.pt': 18,
 '00d40fa2.pt': 10,
 '00d9fa61.pt': 16,
 '00e2b4cd.pt': 19,
 '00f88dc5.pt': 3,
 '00fbb28b.pt': 20,
 '00fcbab2.pt': 21,
 '010aa387.pt': 14,
 '011a2185.pt': 4,
 '0120d246.pt': 22,
 '01235a12.pt': 23,
 '01257aad.pt': 24,
 '01302128.pt': 25,
 '013264d3.pt': 23,
 '013c3135.pt': 7,
 '01506d76.pt': 24,
 '015cf474.pt': 7,
 '0160d55e.pt': 26,
 '01638f61.pt': 11,
 '0172a2a5.pt': 27,
 '017ea24e.pt': 28,
 '01811e48.pt': 25,
 '0184c390.pt': 4,
 '018863f5.pt': 29,
 '018a10bb.pt': 29,
 '018b1df6.pt': 30,
 '018d1dc4.pt': 0,


In [3]:
## Save the new dict of tensors:
np.save("DCASE_tensor_train_labels.npy", lab_dict_t)

In [6]:
test = np.load("DCASE_train_labels.npy").item()

count = [0] * 41

for value in test.values():
    count[value] += 1

arg_min_class = np.argmin(count)
eff_min_class = min(count)

print("The class {0} has the lowest number of elements in train class that is {1}".format(arg_min_class, eff_min_class))
sum(count)

The class 3 has the lowest number of elements in train class that is 94


9473

## Saving the dictionary of labels:

In [2]:
## Copy pasted the result of the dictionary above, I did not want to re-run the code 
## in fear of the label indexing changing, thus modifying the result of the training
dictOfLabels = {'Hi-hat': 0, 'Saxophone': 1, 'Trumpet': 2, 'Glockenspiel': 3, 'Cello': 4, 'Knock': 5, 'Gunshot_or_gunfire': 6, 'Clarinet': 7, 'Computer_keyboard': 8, 'Keys_jangling': 9, 'Snare_drum': 10, 'Writing': 11, 'Laughter': 12, 'Tearing': 13, 'Fart': 14, 'Oboe': 15, 'Flute': 16, 'Cough': 17, 'Telephone': 18, 'Bark': 19, 'Chime': 20, 'Bass_drum': 21, 'Bus': 22, 'Squeak': 23, 'Scissors': 24, 'Harmonica': 25, 'Gong': 26, 'Microwave_oven': 27, 'Burping_or_eructation': 28, 'Double_bass': 29, 'Shatter': 30, 'Fireworks': 31, 'Tambourine': 32, 'Cowbell': 33, 'Electric_piano': 34, 'Meow': 35, 'Drawer_open_or_close': 36, 'Applause': 37, 'Acoustic_guitar': 38, 'Violin_or_fiddle': 39, 'Finger_snapping': 40}

np.save("labelsToNumberDict.npy", dictOfLabels)

# Creating test labels file:

In [2]:
data_frame_test = pd.read_csv("test_post_competition_scoring_clips.csv")

# Showing 47 rows:
data_frame_test.iloc[:47]

Unnamed: 0,fname,label,usage,freesound_id,license
0,00326aa9.wav,Oboe,Private,355125,Attribution
1,0038a046.wav,Bass_drum,Private,90621,Creative Commons 0
2,007759c4.wav,Saxophone,Private,13406,Creative Commons 0
3,008afd93.wav,Saxophone,Private,358962,Attribution
4,00ae03f6.wav,Chime,Private,78203,Attribution
5,00eac343.wav,Electric_piano,Public,371494,Creative Commons 0
6,010a0b3a.wav,Shatter,Private,368342,Attribution
7,01a5a2a3.wav,Bark,Private,30344,Attribution
8,01bb344f.wav,Acoustic_guitar,Private,128810,Attribution
9,02107093.wav,Electric_piano,Private,65660,Attribution


### Loading the associated numbers to those labels:

In [6]:
labels_to_number = np.load("labelsToNumberDict.npy").item()

print(labels_to_number)

{'Hi-hat': 0, 'Saxophone': 1, 'Trumpet': 2, 'Glockenspiel': 3, 'Cello': 4, 'Knock': 5, 'Gunshot_or_gunfire': 6, 'Clarinet': 7, 'Computer_keyboard': 8, 'Keys_jangling': 9, 'Snare_drum': 10, 'Writing': 11, 'Laughter': 12, 'Tearing': 13, 'Fart': 14, 'Oboe': 15, 'Flute': 16, 'Cough': 17, 'Telephone': 18, 'Bark': 19, 'Chime': 20, 'Bass_drum': 21, 'Bus': 22, 'Squeak': 23, 'Scissors': 24, 'Harmonica': 25, 'Gong': 26, 'Microwave_oven': 27, 'Burping_or_eructation': 28, 'Double_bass': 29, 'Shatter': 30, 'Fireworks': 31, 'Tambourine': 32, 'Cowbell': 33, 'Electric_piano': 34, 'Meow': 35, 'Drawer_open_or_close': 36, 'Applause': 37, 'Acoustic_guitar': 38, 'Violin_or_fiddle': 39, 'Finger_snapping': 40}


### Creating the dictionaries of ground truths:

In [8]:
groundtruths         = {}
groundtruths_tensors = {}

for index, row in data_frame_test.iterrows():
    fileName = row['fname']
    label    = row['label']
    
    groundtruths[fileName]                               = labels_to_number[label]
    groundtruths_tensors[fileName.split(".")[0] + ".pt"] = labels_to_number[label]

print(groundtruths)
print(groundtruths_tensors)

{'00326aa9.wav': 15, '0038a046.wav': 21, '007759c4.wav': 1, '008afd93.wav': 1, '00ae03f6.wav': 20, '00eac343.wav': 34, '010a0b3a.wav': 30, '01a5a2a3.wav': 19, '01bb344f.wav': 38, '02107093.wav': 34, '02198549.wav': 24, '023eab1f.wav': 29, '028db587.wav': 29, '02960f07.wav': 24, '02fb6c5b.wav': 5, '030db750.wav': 18, '03319789.wav': 39, '0381efd3.wav': 29, '03c5bfbb.wav': 6, '0422b811.wav': 28, '0459ee65.wav': 7, '04605af5.wav': 30, '046486df.wav': 8, '047bf19c.wav': 16, '04ab46a9.wav': 4, '04ab4e22.wav': 32, '04b249bd.wav': 36, '04ecda67.wav': 21, '053e7bb5.wav': 10, '05723b3a.wav': 4, '057425e6.wav': 14, '0586f0e0.wav': 35, '058e63ea.wav': 32, '059d5420.wav': 1, '05ad10fb.wav': 2, '05c8453f.wav': 31, '05fc58ee.wav': 6, '06123abf.wav': 38, '0638da1a.wav': 10, '06775f3c.wav': 22, '06c535eb.wav': 18, '06e4c394.wav': 39, '07063bc9.wav': 9, '0716b51d.wav': 29, '0761d26b.wav': 37, '07682400.wav': 15, '079faac8.wav': 25, '07c8463e.wav': 17, '07c95625.wav': 35, '07cf422e.wav': 38, '07d5cca9.w

#### Saving the dictionary to .npy file :

In [9]:
np.save("DCASE_tensor_test_labels.npy", groundtruths_tensors)
np.save("DCASE_test_labels.npy", groundtruths)