In [1]:
!pip install -q kaggle --quiet

In [4]:
!cp kaggle.json ~/.kaggle/

In [6]:
!kaggle datasets download -d sripaadsrinivasan/audio-mnist

audio-mnist.zip: Skipping, found more recently modified local copy (use --force to force download)


In [None]:
!unzip audio-mnist.zip

In [8]:
import IPython.display as ipd 
import librosa
import pandas as pd
import os
import numpy as np
from tqdm import tqdm


from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Activation , Dropout

In [9]:
# Set up working data directory
data_dir='/content/data/'
text_file= data_dir+"audioMNIST_meta.txt"
# open the text file 
f = open(text_file, "r")

# read the file data
data=f.read()
print(data)

{
    "01": {
        "accent": "german", 
        "age": 30, 
        "gender": "male", 
        "native speaker": "no", 
        "origin": "Europe, Germany, Wuerzburg", 
        "recordingdate": "17-06-22-11-04-28", 
        "recordingroom": "Kino"
    }, 
    "02": {
        "accent": "German", 
        "age": "25", 
        "gender": "male", 
        "native speaker": "no", 
        "origin": "Europe, Germany, Hamburg", 
        "recordingdate": "17-06-26-17-57-29", 
        "recordingroom": "Kino"
    }, 
    "03": {
        "accent": "German", 
        "age": "31", 
        "gender": "male", 
        "native speaker": "no", 
        "origin": "Europe, Germany, Bremen", 
        "recordingdate": "17-06-30-17-34-51", 
        "recordingroom": "Kino"
    }, 
    "04": {
        "accent": "German", 
        "age": "23", 
        "gender": "male", 
        "native speaker": "no", 
        "origin": "Europe, Germany, Helmstedt", 
        "recordingdate": "17-06-30-18-09-14", 
        "

In [10]:
# list of all the folder id
folder_id=list(range(1,60))

# list to store all the gender
gender_list=[]

# looping for each data in the text
for i in range(len(data)-6):
    
    # finding the word gender in the data
    if data[i:i+6]=="gender":
        
        # extracting the affiliated gender 
        txt=data[i+10:i+16]
        
        # since male and female have different count of alphabets
        # preprocessing the data to get the actual text 
        if txt[-2]=='"':
            txt=txt[:len(txt)-2]
        
        # appending the gender into the list
        gender_list.append(txt.split(","))

# creating a dictonary that keeps track of folder id and associated gender
folder_dict=dict(zip(folder_id,gender_list))
print(folder_dict)

{1: ['male'], 2: ['male'], 3: ['male'], 4: ['male'], 5: ['male'], 6: ['male'], 7: ['male'], 8: ['male'], 9: ['male'], 10: ['male'], 11: ['male'], 12: ['female'], 13: ['male'], 14: ['male'], 15: ['male'], 16: ['male'], 17: ['male'], 18: ['male'], 19: ['male'], 20: ['male'], 21: ['male'], 22: ['male'], 23: ['male'], 24: ['male'], 25: ['male'], 26: ['female'], 27: ['male'], 28: ['female'], 29: ['male'], 30: ['male'], 31: ['male'], 32: ['male'], 33: ['male'], 34: ['male'], 35: ['male'], 36: ['female'], 37: ['male'], 38: ['male'], 39: ['male'], 40: ['male'], 41: ['male'], 42: ['male'], 43: ['female'], 44: ['male'], 45: ['male'], 46: ['male'], 47: ['female'], 48: ['male'], 49: ['male'], 50: ['male'], 51: ['male'], 52: ['female'], 53: ['male'], 54: ['male'], 55: ['male'], 56: ['female'], 57: ['female'], 58: ['female'], 59: ['female']}


In [11]:
# save file names
file_names=[]

# sabe all gender of file names
gender=[]

# save all file id and gender
for dirname, _, filenames in os.walk('/content/data'):
    for filename in filenames:
        file_origin=filename[2:4]
        if file_origin=='di':
            pass
        elif int(file_origin) in folder_dict:
            file_names.append(filename)
            gender.append(folder_dict[int(file_origin)])
        else:
            pass

In [12]:
import pandas as pd
# create na new dataframe
meta_data = pd.DataFrame(gender,columns=['class'])
meta_data["file_name"]=file_names

In [13]:
meta_data.head()

Unnamed: 0,class,file_name
0,male,1_03_6.wav
1,male,3_03_44.wav
2,male,3_03_13.wav
3,male,4_03_15.wav
4,male,1_03_39.wav


In [14]:
!pip install resampy --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m [32m3.1/3.1 MB[0m [31m145.9 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m66.2 MB/s[0m eta [36m0:00:00[0m
[?25h

In [19]:
from tqdm import tqdm
import librosa
# do feature extraction using librosa
def features_extract(file):
    # load the audio file
    audio,sample_rate = librosa.load(file_name,res_type='kaiser_fast')
    
    # extract the features
    feature = librosa.feature.mfcc(y=audio,sr=sample_rate,n_mfcc=50)
    
    # feature scaling
    scaled_feature = np.mean(feature.T,axis=0)
    
    # return the scaled features
    return scaled_feature

# list containg all the features
extracted = []


# for each row in the csv
for index_num,row in tqdm(meta_data.iterrows()):
    
    # get the file 
    file_name = os.path.join(os.path.abspath(data_dir),row['file_name'][2:4]+'/',str(row['file_name']))

    # get file label
    final_class_labels = row['class']
    
    # extract feature
    data= features_extract(file_name)
    
    # store it in a list
    extracted.append([data,final_class_labels])

29500it [10:02, 48.98it/s]


In [20]:
# create na new dataframe
extracted_df = pd.DataFrame(extracted,columns=['feature','class'])
# display first fivve rows of the dataframe
extracted_df.head()

Unnamed: 0,feature,class
0,"[-683.48285, 148.5554, 9.697626, 39.098167, 20...",male
1,"[-671.5128, 119.278564, 11.740626, 52.78524, 3...",male
2,"[-686.6968, 128.21233, 13.634573, 52.420574, 3...",male
3,"[-662.9957, 112.28084, 17.904793, 28.873457, -...",male
4,"[-707.7623, 142.9215, 20.297323, 39.672703, 16...",male


In [21]:
x = np.array(extracted_df['feature'].tolist())
y = np.array(extracted_df['class'].tolist())

In [22]:
le = LabelEncoder()
Y = to_categorical(le.fit_transform(y))

In [23]:
# split the data to train and test set
x_train, x_test, y_train, y_test = train_test_split(x, Y, test_size=0.2, random_state = 42)

# print the details
print("Number of training samples = ", x_train.shape[0])
print("Number of testing samples = ",x_test.shape[0])

Number of training samples =  23600
Number of testing samples =  5900


In [34]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
num_labels = Y.shape[1]

# Define the model
model = Sequential()

# Add convolutional layers
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(40, 40, 1)))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))

# Flatten the feature maps
model.add(Flatten())

# Add a dense layer with the number of output labels
num_labels = Y.shape[1]
model.add(Dense(num_labels))



# Print model summary
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 38, 38, 32)        320       
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 19, 19, 32)       0         
 2D)                                                             
                                                                 
 conv2d_10 (Conv2D)          (None, 17, 17, 64)        18496     
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 8, 8, 64)         0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 6, 6, 128)         73856     
                                                                 
 max_pooling2d_11 (MaxPoolin  (None, 3, 3, 128)       

In [35]:
# Construct model 
num_labels = Y.shape[1]
model2 = Sequential()

model2.add(Dense(256, input_shape=(50,)))
model2.add(Activation('relu'))
model2.add(Dropout(0.5))

model2.add(Dense(256))
model2.add(Activation('relu'))
model2.add(Dropout(0.5))
model2.add(Dense(256))
model2.add(Activation('relu'))
model2.add(Dropout(0.5))
model2.add(Dense(128))

model2.add(Dense(num_labels))
model2.add(Activation('softmax'))
model2.summary()

Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 256)               13056     
                                                                 
 activation_1 (Activation)   (None, 256)               0         
                                                                 
 dropout_6 (Dropout)         (None, 256)               0         
                                                                 
 dense_11 (Dense)            (None, 256)               65792     
                                                                 
 activation_2 (Activation)   (None, 256)               0         
                                                                 
 dropout_7 (Dropout)         (None, 256)               0         
                                                                 
 dense_12 (Dense)            (None, 256)              

In [36]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy'],
)

In [37]:
model2.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy'],
)


In [38]:
num_epochs = 15
num_batch_size = 32

model.fit(
          x_train, 
          y_train, 
          batch_size=num_batch_size, 
          epochs=num_epochs,
          validation_data=(x_test, y_test),
         )


Epoch 1/15


ValueError: ignored