In [3]:
import librosa
import numpy as np
import matplotlib as plt
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import json

In [4]:
import os

In [5]:
DATA_ROOT = "../data/openmic-2018/"

if not os.path.exists(DATA_ROOT):
    raise ValueError("Did you forget to set `DATA_ROOT`?")

From Notebook tutorial on [Official Github](https://github.com/cosmir/openmic-2018/blob/master/examples/modeling-baseline.ipynb):
> The openmic data is provided in a python-friendly format as `openmic-2018.npz`.

In [6]:
OPENMIC = np.load(os.path.join(DATA_ROOT, "openmic-2018.npz"), allow_pickle=True)

In [7]:
print(list(OPENMIC.keys()))

['X', 'Y_true', 'Y_mask', 'sample_key']


In [8]:
X, Y_true, Y_mask, sample_key = (
    OPENMIC["X"],
    OPENMIC["Y_true"],
    OPENMIC["Y_mask"],
    OPENMIC["sample_key"],
)

In [9]:
with open(os.path.join(DATA_ROOT, "class-map.json"), "r") as f:
    class_map = json.load(f)

**QUESTIONS** Why is there a "confidence" intervall?

In [10]:
# sandbox
test19000 = zip(Y_true[19000], Y_mask[19000], class_map)
test50 = zip(Y_true[50], Y_mask[50], class_map)
test40 = zip(Y_true[40], Y_mask[40], class_map)

f = lambda x: x[1] == True

print("Sample 19000: ", *filter(f, test19000))
print("\n")
print("Sample 50: ", *filter(f, test50))
print("\n")
print("Sample 40: ", *filter(f, test40))

Sample 19000:  (0.0, True, 'guitar') (1.0, True, 'violin')


Sample 50:  (1.0, True, 'flute')


Sample 40:  (0.15055000000000002, True, 'cymbals')


Import the 'individual responses' into a Panda's dataframe. The datset contains 20,000, 10-second audio examples from the Free Music Archieve.

In [11]:
# Let's split the data into the training and test set
# We use squeeze=True here to return a single array for each, rather than a full DataFrame

split_train = pd.read_csv(
    os.path.join(DATA_ROOT, "partitions/split01_train.csv"), header=None, squeeze=True
)
split_test = pd.read_csv(
    os.path.join(DATA_ROOT, "partitions/split01_test.csv"), header=None, squeeze=True
)

split_test.head()



  split_train = pd.read_csv(


  split_test = pd.read_csv(


0      000178_3840
1     000308_61440
2    000312_184320
3    000319_145920
4    000321_218880
Name: 0, dtype: object

In [12]:
# sandbox
# vill mappa split_train och split_test mot index i... ja vad

In [13]:
# How many train and test examples do we have?  About 75%/25%
print("# Train: {},  # Test: {}".format(len(split_train), len(split_test)))

# Train: 14915,  # Test: 5085


In [14]:
# convert df to set
train_set = set(split_train)
test_set = set(split_test)

In [15]:
# These loops go through all sample keys, and save their row numbers
# to either idx_train or idx_test
#
# This will be useful in the next step for slicing the array data
idx_train, idx_test = [], []

for idx, n in enumerate(sample_key):
    if n in train_set:
        idx_train.append(idx)
    elif n in test_set:
        idx_test.append(idx)
    else:
        # This should never happen, but better safe than sorry.
        raise RuntimeError("Unknown sample key={}! Abort!".format(sample_key[n]))

# Finally, cast the idx_* arrays to numpy structures
idx_train = np.asarray(idx_train)
idx_test = np.asarray(idx_test)

In [16]:
data_train = Y_mask[idx_train]
data_test = Y_mask[idx_train]
sample_key_train = sample_key[idx_train]  # numpy.ndarray
sample_key_test = sample_key[idx_test]

type(sample_key_train)

numpy.ndarray

In [17]:
def generate_dataframe_with_labels(indices, class_map, Y_mask):
    """
    A method for creating a pd.df with each label in Y_mask and instrument in class_map
    Requires sorting of indices first (idx_train and idx_test)
    """
    data = {}
    for i in indices:
        tmp_dict = {}
        for instr, pred in zip(class_map, Y_mask[i]):
            tmp_dict[instr] = pred
        data[sample_key[i]] = tmp_dict

    return pd.DataFrame.from_dict(data, orient="index")

In [18]:
# Finally, we use the split indices to partition the features, labels, and masks
X_train = X[idx_train]
X_test = X[idx_test]

Y_true_train = Y_true[idx_train]
Y_true_test = Y_true[idx_test]

Y_mask_train = Y_mask[idx_train]
Y_mask_test = Y_mask[idx_test]

# set up dataframes with sample key and correct label
df_train = generate_dataframe_with_labels(idx_train, class_map, Y_mask)
df_test = generate_dataframe_with_labels(idx_test, class_map, Y_mask)
df_test.head()

Unnamed: 0,accordion,banjo,bass,cello,clarinet,cymbals,drums,flute,guitar,mallet_percussion,mandolin,organ,piano,saxophone,synthesizer,trombone,trumpet,ukulele,violin,voice
000178_3840,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True
000308_61440,False,False,True,False,False,False,False,False,False,False,False,True,False,False,False,False,False,False,False,False
000312_184320,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False
000319_145920,False,False,True,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False,False
000321_218880,False,False,True,False,False,False,False,False,False,False,False,False,False,False,True,False,False,False,False,False


In [19]:
# Print out the sliced shapes as a sanity check
print(X_train.shape)
print(X_test.shape)
print(X_train[27][2])

(14915, 10, 128)
(5085, 10, 128)
[191  48 202  77 214  45  48  72  27 255 112 100  37 189 116  97 168 173
 192 230  98 255 143 153 160 255  21 227 126 205 138 208 184 255   2 175
 115  58 155 204  93 255   0 175 255 176  64  92 110  89  46   8  78 111
 194 197 199  46 133 141 137  87   0 190 110 113 225 131 216 180   0  53
 255  86 230 105 120   1 142 134 153 255 199  71   0   0 174 255   0 255
   0  81 255 147 174 163   0   0  94 225 255  66 207 211 141 255  97  55
 138 255   0 255 101 170 255   0 232 120   0   0 255 164   0 186 227 255
   0 255]


In [20]:
%%writefile magic_functions.py
import librosa

def get_audio(f):
    y, _ = librosa.load(f)
    return y

def process_frame(f):
    # changed your logic here as I couldn't repro it
    return f, get_audio

Overwriting magic_functions.py


In [21]:
DATA_ROOT = "../data/openmic-2018/"
path = DATA_ROOT + "audio"
# file_paths = [os.path.join(subdir, [file for file in files]) for subdir, dirs, files in os.walk(DATA_ROOT + 'audio')]
file_paths = [
    [None if file[0] == "." else os.path.join(subdir, file) for file in files]
    for subdir, dirs, files in os.walk(DATA_ROOT + "audio")
]

Flytta nedanstående tre rutor till data.ipynb och skriv fina texter som knyter an till den rapporten vi vill duplicera

In [22]:
import librosa as lr
import pandas as pd

file_paths = lr.util.find_files(DATA_ROOT + "audio", ext="ogg")
index = pd.DataFrame({"file_name": file_paths})
index

Unnamed: 0,file_name
0,/workspaces/instrument-detection/data/openmic-...
1,/workspaces/instrument-detection/data/openmic-...
2,/workspaces/instrument-detection/data/openmic-...
3,/workspaces/instrument-detection/data/openmic-...
4,/workspaces/instrument-detection/data/openmic-...
...,...
19995,/workspaces/instrument-detection/data/openmic-...
19996,/workspaces/instrument-detection/data/openmic-...
19997,/workspaces/instrument-detection/data/openmic-...
19998,/workspaces/instrument-detection/data/openmic-...


In [39]:
from turtle import ycor
import pedalboard as pb
import librosa as lr


def preprocess(file):
    features = {}
    with pb.io.AudioFile(file) as f:
        # TODO so some files have varying SR, which could be problematic
        #assert f.samplerate == 44100, f"Sample rate is not 44.1khz for {file}!"
        # TODO count _one_ spectogram and then use as input to all features
        y = f.read(f.frames)
        y = y.mean(axis=0)  # mono
        S = np.abs(librosa.stft(y, hop_length=512, win_length=1024))**2
        zcrs = librosa.feature.zero_crossing_rate(y=y)
        features["zcr_mean"] = zcrs.mean()
        features["zcr_std"] = zcrs.std()
        scs = librosa.feature.spectral_centroid(S=S)
        features["sc_mean"] = scs.mean()
        features["sc_std"] = scs.std()
        mfccs = librosa.feature.mfcc(S=librosa.power_to_db(S), n_mfcc=13)
        for i, mfcc in enumerate(mfccs):
            features['mfcc' + str(i)] = np.mean(mfcc)

    return features

In [40]:
from tqdm import tqdm
import numpy as np

from multiprocess import Pool

with Pool() as p:
    ys = list(tqdm(p.imap(preprocess, index["file_name"]), total=len(index)))

pd.DataFrame(ys)

100%|██████████| 20000/20000 [07:19<00:00, 45.55it/s]


Unnamed: 0,zcr_mean,zcr_std,sc_mean,sc_std,mfcc0,mfcc1,mfcc2,mfcc3,mfcc4,mfcc5,mfcc6,mfcc7,mfcc8,mfcc9,mfcc10,mfcc11,mfcc12
0,0.036367,0.021923,296.517136,133.109808,-978.598022,310.696777,108.373276,126.306877,103.131187,61.552929,50.078197,35.620724,36.752838,21.056427,16.246675,23.335825,14.897064
1,0.052411,0.013491,478.192757,141.199973,-413.563721,551.550598,12.113594,90.307190,68.209785,9.139759,51.813713,5.636692,6.231468,17.053007,-11.894317,5.294181,2.507157
2,0.081234,0.014925,604.505859,189.731508,-544.543701,582.932617,30.000204,-4.941914,96.540993,4.919924,-21.477848,39.302097,-0.072678,-26.966000,26.628426,17.684668,-17.259043
3,0.053718,0.013248,462.825122,153.244140,-934.030762,499.909790,244.189911,86.147011,35.783989,4.024544,-17.910740,-9.677771,9.737517,25.201883,35.146244,28.466101,9.925189
4,0.082449,0.034076,601.706717,329.074617,-561.763611,444.960205,-10.617265,84.151665,27.753359,4.536748,29.895000,-5.476596,27.277163,13.598028,14.319709,27.292217,0.721995
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
19995,0.050435,0.009352,508.286842,94.249039,-978.444946,393.793549,178.069458,80.083481,53.589272,23.759466,3.446425,-4.533399,-11.920152,-14.391589,-8.987197,-2.683615,-1.517069
19996,0.009752,0.004809,61.216410,42.815086,-1092.291382,234.526062,199.707275,163.494125,129.694733,99.337563,74.979210,55.700863,41.367481,33.264076,28.860458,24.709829,20.218401
19997,0.065545,0.022368,450.432458,192.751011,-625.574707,449.861298,-39.440456,75.393967,79.896980,-8.778455,49.855957,24.374577,1.175638,29.078180,-2.689997,0.299629,9.396278
19998,0.042145,0.015084,332.139261,146.760990,-786.657532,417.759430,105.764244,87.762772,79.656799,33.620518,21.303242,9.829395,35.033508,26.835651,-27.157253,-6.835197,17.680960


In [12]:
from tqdm import tqdm

from multiprocess import Pool
from magic_functions import process_frame
import librosa

file_paths = [
    [None if file[0] == "." else os.path.join(subdir, file) for file in files]
    for subdir, dirs, files in os.walk(DATA_ROOT + "audio")
]
test_paths = list(filter(lambda x: x is not None, file_paths[1]))

max_pool = 5

with Pool(max_pool) as p:
    pool_outputs = list(tqdm(p.imap(process_frame, test_paths), total=len(test_paths)))

##print(pool_outputs)
# new_dict = dict(pool_outputs)

# print("dict:", new_dict)

['../data/openmic-2018/audio/000/000046_3840.ogg', '../data/openmic-2018/audio/000/000135_483840.ogg', '../data/openmic-2018/audio/000/000139_119040.ogg', '../data/openmic-2018/audio/000/000141_153600.ogg', '../data/openmic-2018/audio/000/000144_30720.ogg', '../data/openmic-2018/audio/000/000145_172800.ogg', '../data/openmic-2018/audio/000/000154_288000.ogg', '../data/openmic-2018/audio/000/000178_3840.ogg', '../data/openmic-2018/audio/000/000182_145920.ogg', '../data/openmic-2018/audio/000/000189_207360.ogg', '../data/openmic-2018/audio/000/000190_126720.ogg', '../data/openmic-2018/audio/000/000195_280320.ogg', '../data/openmic-2018/audio/000/000201_168960.ogg', '../data/openmic-2018/audio/000/000202_142080.ogg', '../data/openmic-2018/audio/000/000203_7680.ogg', '../data/openmic-2018/audio/000/000205_61440.ogg', '../data/openmic-2018/audio/000/000208_195840.ogg', '../data/openmic-2018/audio/000/000211_49920.ogg', '../data/openmic-2018/audio/000/000212_211200.ogg', '../data/openmic-201

100%|██████████| 132/132 [00:48<00:00,  2.69it/s]


[(array([ 0.        ,  0.        ,  0.        , ..., -0.1573766 ,
       -0.15117157, -0.1723783 ], dtype=float32), 22050), (array([ 0.        ,  0.        ,  0.        , ...,  0.45528466,
        0.268466  , -0.0851104 ], dtype=float32), 22050), (array([ 0.        ,  0.        ,  0.        , ..., -0.2580941 ,
       -0.18306255, -0.16655417], dtype=float32), 22050), (array([ 0.        ,  0.        ,  0.        , ..., -0.1382622 ,
       -0.15191478, -0.1384398 ], dtype=float32), 22050), (array([0.        , 0.        , 0.        , ..., 0.29437143, 0.18351454,
       0.21160512], dtype=float32), 22050), (array([ 0.        ,  0.        ,  0.        , ..., -0.01810446,
       -0.06072648, -0.10858434], dtype=float32), 22050), (array([-4.8456505e-10, -5.9406466e-09, -1.4415963e-08, ...,
        1.3822870e-01,  1.3761765e-01,  1.4607213e-01], dtype=float32), 22050), (array([0.        , 0.        , 0.        , ..., 0.27550063, 0.23395097,
       0.20044863], dtype=float32), 22050), (array([ 

TypeError: unhashable type: 'numpy.ndarray'