In [11]:
import numpy as np
import os
import librosa
import pandas as pd

#in conda environment:
#*. Open appropriate anaconda prompt(according to environment) and try following commands inorder,

#First, conda install -c numba numba

#Then, conda install -c conda-forge librosa

In [12]:
#list the files
filelist_train = os.listdir('Train\Sound') 
filelist_test = os.listdir('Test\Sound') 

#read them into pandas
train_df = pd.DataFrame(filelist_train)
test_df = pd.DataFrame(filelist_test)

In [13]:
# Renaming the column name to file
train_df = train_df.rename(columns={0:'file'})
test_df = test_df.rename(columns={0:'file'})

# Code in case we have to drop the '.DS_Store' and reset the index
train_df[train_df['file']=='.DS_Store']
train_df.drop(16, inplace=True)
train_df = train_df.sample(frac=1).reset_index(drop=True)

In [14]:
# We create an empty list where we will append all the speakers ids for each row of our dataframe by slicing the file name since we know the id is the first number before the hash
speaker = []
for i in range(0, len(train_df)):
    speaker.append(train_df['file'][i].split('-')[0])
# We now assign the speaker to a new column 
train_df['speaker'] = speaker

speaker = []
for i in range(0, len(test_df)):
    speaker.append(test_df['file'][i].split('-')[0])
# We now assign the speaker to a new column 
test_df['speaker'] = speaker

In [15]:
print(train_df.head())
print(test_df.head())


                    file speaker
0  7601-101619-0000.flac    7601
1  3663-172005-0000.flac    3663
2  3663-172005-0003.flac    3663
3   7641-96252-0003.flac    7641
4  7601-101619-0003.flac    7601
                    file speaker
0  1688-142285-0000.flac    1688
1  1688-142285-0001.flac    1688
2  1688-142285-0002.flac    1688
3  1688-142285-0003.flac    1688
4   1998-15444-0000.flac    1998


In [27]:
def extract_features(files):
    # Sets the name to be the path to where the file is in my computer
    file_name = os.path.join(os.path.abspath('Train\Sound')+'/'+str(files.file))
    # Loads the audio file as a floating point time series and assigns the default sample rate
    # Sample rate is set to 22050 by default
    X, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
    # Generate Mel-frequency cepstral coefficients (MFCCs) from a time series 
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T,axis=0)
    # Generates a Short-time Fourier transform (STFT) to use in the chroma_stft
    stft = np.abs(librosa.stft(X))
        # Computes a chromagram from a waveform or power spectrogram.
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)
    # Computes a mel-scaled spectrogram.
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
    # Computes spectral contrast
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T,axis=0)
    # Computes the tonal centroid features (tonnetz)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X),
    sr=sample_rate).T,axis=0)
    return (mfccs, chroma, mel, contrast, tonnetz)

In [28]:
train_features = train_df.apply(extract_features, axis=1)
print(train_features)

  0.        ] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
 -3.4134242e-05  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
  0.00034426] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
 -8.4543273e-05  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
 -9.3067378e-05  0.0000000e+00] as keyword args. From version 0.10 passing these as positional arguments will result in an error
  mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T,axis=0)
  0.       

0      ([-348.44696, 104.391655, -14.983462, 52.5148,...
1      ([-392.33408, 112.08962, -9.961716, 24.077326,...
2      ([-392.66724, 91.69414, -20.508627, 42.084164,...
3      ([-338.7318, 130.6695, -6.7340627, 52.223732, ...
4      ([-364.89853, 109.29126, -11.507843, 55.564728...
                             ...                        
126    ([-325.17865, 151.09589, 2.45279, 19.552773, -...
127    ([-381.25192, 94.73258, -31.878931, 40.05473, ...
128    ([-375.27408, 146.40617, -8.72412, 35.207653, ...
129    ([-250.84361, 93.31642, -28.628273, 70.19386, ...
130    ([-352.0547, 114.63603, -38.703056, 42.782986,...
Length: 131, dtype: object


In [26]:
features_train = []
for i in range(0, len(train_features)):
    features_train.append(np.concatenate((
        train_features[i][0],
        train_features[i][1], 
        train_features[i][2], 
        train_features[i][3],
        train_features[i][4]), axis=0))

X_train = np.array(features_train)

IndexError: tuple index out of range

In [None]:
y_train = np.array(train_df['speaker'])
