In [27]:
import helpers
import pandas as pd
import pickle
import numpy as np
import math
import librosa.feature as lf

In [2]:
TRAIN_CSV = 'data/external/train.csv'
TRAIN_FILES = 'data/external/audio_train'

In [3]:
tags = list(pd.read_csv(TRAIN_CSV).label.unique())

Load each label and process into 1 second clips separately

In [7]:
for tag_num, tag_name in enumerate(tags):
    train_files = helpers.find_paths_with_tags(csv_path=TRAIN_CSV, files_path=TRAIN_FILES, filters=[tag_name])
    wav_data = helpers.load_wav_files(train_files, duration=15)
    with open('data/raw/train/wav-data-{}.pkl'.format(tag_name), 'wb') as f:
        pickle.dump(wav_data, f)
    print('Processed {}-{}'.format(tag_num, tag_name))

Processed 0-Hi-hat
Processed 1-Saxophone
Processed 2-Trumpet
Processed 3-Glockenspiel
Processed 4-Cello
Processed 5-Knock
Processed 6-Gunshot_or_gunfire
Processed 7-Clarinet
Processed 8-Computer_keyboard
Processed 9-Keys_jangling
Processed 10-Snare_drum
Processed 11-Writing
Processed 12-Laughter
Processed 13-Tearing
Processed 14-Fart
Processed 15-Oboe
Processed 16-Flute
Processed 17-Cough
Processed 18-Telephone
Processed 19-Bark
Processed 20-Chime
Processed 21-Bass_drum
Processed 22-Bus
Processed 23-Squeak
Processed 24-Scissors
Processed 25-Harmonica
Processed 26-Gong
Processed 27-Microwave_oven
Processed 28-Burping_or_eructation
Processed 29-Double_bass
Processed 30-Shatter
Processed 31-Fireworks
Processed 32-Tambourine
Processed 33-Cowbell
Processed 34-Electric_piano
Processed 35-Meow
Processed 36-Drawer_open_or_close


MemoryError: 

In [4]:
for tag_num, tag_name in enumerate(tags[37:]):
    train_files = helpers.find_paths_with_tags(csv_path=TRAIN_CSV, files_path=TRAIN_FILES, filters=[tag_name])
    wav_data = helpers.load_wav_files(train_files, duration=15)
    with open('data/raw/train/wav-data-{}.pkl'.format(tag_name), 'wb') as f:
        pickle.dump(wav_data, f)
    print('Processed {}-{}'.format(tag_num, tag_name))

Processed 0-Applause
Processed 1-Acoustic_guitar
Processed 2-Violin_or_fiddle
Processed 3-Finger_snapping


#### 1 second versions

In [18]:
for tag_num, tag_name in enumerate(tags):
    train_files = helpers.find_paths_with_tags(csv_path=TRAIN_CSV, files_path=TRAIN_FILES, filters=[tag_name])
    wav_data = helpers.load_wav_files(train_files, duration=1)
    with open('data/raw/train-1-sec/wav-data-{}.pkl'.format(tag_name), 'wb') as f:
        pickle.dump(wav_data, f)
    print('Processed {}-{}'.format(tag_num, tag_name))

Processed 0-Hi-hat
Processed 1-Saxophone
Processed 2-Trumpet
Processed 3-Glockenspiel
Processed 4-Cello
Processed 5-Knock
Processed 6-Gunshot_or_gunfire
Processed 7-Clarinet
Processed 8-Computer_keyboard
Processed 9-Keys_jangling
Processed 10-Snare_drum
Processed 11-Writing
Processed 12-Laughter
Processed 13-Tearing
Processed 14-Fart
Processed 15-Oboe
Processed 16-Flute
Processed 17-Cough
Processed 18-Telephone
Processed 19-Bark
Processed 20-Chime
Processed 21-Bass_drum
Processed 22-Bus
Processed 23-Squeak
Processed 24-Scissors
Processed 25-Harmonica
Processed 26-Gong
Processed 27-Microwave_oven
Processed 28-Burping_or_eructation
Processed 29-Double_bass
Processed 30-Shatter
Processed 31-Fireworks
Processed 32-Tambourine
Processed 33-Cowbell
Processed 34-Electric_piano
Processed 35-Meow
Processed 36-Drawer_open_or_close
Processed 37-Applause
Processed 38-Acoustic_guitar
Processed 39-Violin_or_fiddle
Processed 40-Finger_snapping


## Process features

In [76]:
def pad_audio(sound: np.ndarray, sample_rate=22050):
    padded_sound = np.tile(sound, math.ceil(sample_rate / sound.shape[0]))
    return padded_sound[:sample_rate]

#### chroma_stft

In [69]:
%%time
feature_name = 'chroma_stft'
for tag_num, tag_name in enumerate(tags):
    with open('data/raw/train-1-sec/wav-data-{}.pkl'.format(tag_name), 'rb') as f:
        wav_data = pickle.load(f)
    wav_features = {sample.name: lf.chroma_stft(pad_audio(sample.wav[0])).flatten() for sample in wav_data}
    df_features = (
        pd.DataFrame.from_dict(wav_features, orient='index')
        .reset_index().rename({'index': 'name'}, axis=1)
    )
    df_features.columns = ['name'] + [feature_name + '_' + str(column_name) for column_name in list(df_features.columns)[1:]]
    df_features.to_pickle('data/interim/{}-1-sec/{}.pkl'.format(feature_name, tag_name))



Wall time: 1min 6s


#### mfcc

In [45]:
%%time
feature_name = 'mfcc'
for tag_num, tag_name in enumerate(tags):
    with open('data/raw/train-1-sec/wav-data-{}.pkl'.format(tag_name), 'rb') as f:
        wav_data = pickle.load(f)
    wav_features = {sample.name: lf.mfcc(pad_audio(sample.wav[0])).flatten() for sample in wav_data}
    df_features = (
        pd.DataFrame.from_dict(wav_features, orient='index')
        .reset_index().rename({'index': 'name'}, axis=1)
    )
    df_features.columns = ['name'] + [feature_name + '_' + str(column_name) for column_name in list(df_features.columns)[1:]]
    df_features.to_pickle('data/interim/{}-1-sec/{}.pkl'.format(feature_name, tag_name))

Wall time: 1min 14s


In [77]:
a = pd.read_pickle('data/interim/{}-1-sec/{}.pkl'.format('chroma_stft', tags[7]))

In [78]:
b = pd.read_pickle('data/interim/{}-1-sec/{}.pkl'.format('mfcc', tags[7]))

In [79]:
c = a.merge(b, on=['name'])

In [80]:
c.shape

(300, 1409)

#### Make a model for each feature - one more chroma_stft and one for mfcc

In [82]:
from sklearn import linear_model

Use the multiclass version

In [83]:
model = linear_model.LogisticRegression()

chroma_stft