In [1]:
!pip install -q kaggle --quiet

In [4]:
!cp kaggle.json ~/.kaggle/

In [5]:
!kaggle datasets download -d sripaadsrinivasan/audio-mnist

Downloading audio-mnist.zip to /content
 99% 935M/948M [00:08<00:00, 140MB/s]
100% 948M/948M [00:08<00:00, 115MB/s]


In [None]:
!unzip audio-mnist.zip

In [7]:
import IPython.display as ipd 
import librosa
import pandas as pd
import os
import numpy as np
from tqdm import tqdm


from sklearn.model_selection import train_test_split 
from sklearn.preprocessing import LabelEncoder

import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense , Activation , Dropout

In [None]:
# Set up working data directory
data_dir='/content/data/'
text_file= data_dir+"audioMNIST_meta.txt"
# open the text file 
f = open(text_file, "r")

# read the file data
data=f.read()
print(data)

In [9]:
# list of all the folder id
folder_id=list(range(1,60))

# list to store all the gender
gender_list=[]

# looping for each data in the text
for i in range(len(data)-6):
    
    # finding the word gender in the data
    if data[i:i+6]=="gender":
        
        # extracting the affiliated gender 
        txt=data[i+10:i+16]
        
        # since male and female have different count of alphabets
        # preprocessing the data to get the actual text 
        if txt[-2]=='"':
            txt=txt[:len(txt)-2]
        
        # appending the gender into the list
        gender_list.append(txt.split(","))

# creating a dictonary that keeps track of folder id and associated gender
folder_dict=dict(zip(folder_id,gender_list))
print(folder_dict)

{1: ['male'], 2: ['male'], 3: ['male'], 4: ['male'], 5: ['male'], 6: ['male'], 7: ['male'], 8: ['male'], 9: ['male'], 10: ['male'], 11: ['male'], 12: ['female'], 13: ['male'], 14: ['male'], 15: ['male'], 16: ['male'], 17: ['male'], 18: ['male'], 19: ['male'], 20: ['male'], 21: ['male'], 22: ['male'], 23: ['male'], 24: ['male'], 25: ['male'], 26: ['female'], 27: ['male'], 28: ['female'], 29: ['male'], 30: ['male'], 31: ['male'], 32: ['male'], 33: ['male'], 34: ['male'], 35: ['male'], 36: ['female'], 37: ['male'], 38: ['male'], 39: ['male'], 40: ['male'], 41: ['male'], 42: ['male'], 43: ['female'], 44: ['male'], 45: ['male'], 46: ['male'], 47: ['female'], 48: ['male'], 49: ['male'], 50: ['male'], 51: ['male'], 52: ['female'], 53: ['male'], 54: ['male'], 55: ['male'], 56: ['female'], 57: ['female'], 58: ['female'], 59: ['female']}


In [10]:
# save file names
file_names=[]

# sabe all gender of file names
gender=[]

# save all file id and gender
for dirname, _, filenames in os.walk('/content/data'):
    for filename in filenames:
        file_origin=filename[2:4]
        if file_origin=='di':
            pass
        elif int(file_origin) in folder_dict:
            file_names.append(filename)
            gender.append(folder_dict[int(file_origin)])
        else:
            pass

In [11]:
import pandas as pd
# create na new dataframe
meta_data = pd.DataFrame(gender,columns=['class'])
meta_data["file_name"]=file_names

In [12]:
meta_data.head()

Unnamed: 0,class,file_name
0,male,6_41_44.wav
1,male,7_41_23.wav
2,male,0_41_30.wav
3,male,8_41_49.wav
4,male,1_41_4.wav


In [13]:
!pip install resampy --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/3.1 MB[0m [31m?[0m eta [36m-:--:--[0m[2K     [91m━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.3/3.1 MB[0m [31m8.4 MB/s[0m eta [36m0:00:01[0m[2K     [91m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m[91m╸[0m[90m━━━[0m [32m2.8/3.1 MB[0m [31m40.5 MB/s[0m eta [36m0:00:01[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m31.1 MB/s[0m eta [36m0:00:00[0m
[?25h

In [14]:
from tqdm import tqdm
import librosa
# do feature extraction using librosa
def features_extract(file):
    # load the audio file
    audio,sample_rate = librosa.load(file_name,res_type='kaiser_fast')
    
    # extract the features
    feature = librosa.feature.mfcc(y=audio,sr=sample_rate,n_mfcc=50)
    
    # feature scaling
    scaled_feature = np.mean(feature.T,axis=0)
    
    # return the scaled features
    return scaled_feature

# list containg all the features
extracted = []


# for each row in the csv
for index_num,row in tqdm(meta_data.iterrows()):
    
    # get the file 
    file_name = os.path.join(os.path.abspath(data_dir),row['file_name'][2:4]+'/',str(row['file_name']))

    # get file label
    final_class_labels = row['class']
    
    # extract feature
    data= features_extract(file_name)
    
    # store it in a list
    extracted.append([data,final_class_labels])

29500it [15:12, 32.33it/s]


In [15]:
# create na new dataframe
extracted_df = pd.DataFrame(extracted,columns=['feature','class'])
# display first fivve rows of the dataframe
extracted_df.head()

Unnamed: 0,feature,class
0,"[-543.0836, -3.4463952, 24.653305, 19.151833, ...",male
1,"[-515.62177, 98.82885, 2.563472, 27.277157, 19...",male
2,"[-507.0467, 104.04916, 21.317314, 31.931906, 3...",male
3,"[-548.31104, 52.18395, 0.03327163, 69.82653, 3...",male
4,"[-555.70734, 155.17976, 16.231964, 34.267853, ...",male


In [29]:
extracted_df['class'].value_counts()

male      24000
female     5500
Name: class, dtype: int64

In [16]:
x = np.array(extracted_df['feature'].tolist())
y = np.array(extracted_df['class'].tolist())

In [17]:
le = LabelEncoder()
Y = to_categorical(le.fit_transform(y))

In [30]:
# split the data to train and test set
x_train, x_test, y_train, y_test = train_test_split(x, Y, test_size=0.2, random_state = 42,stratify=y)

# print the details
print("Number of training samples = ", x_train.shape[0])
print("Number of testing samples = ",x_test.shape[0])

Number of training samples =  23600
Number of testing samples =  5900


In [48]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout


In [32]:
# Construct model 
num_labels = Y.shape[1]
model2 = Sequential()

model2.add(Dense(256, input_shape=(50,)))
model2.add(Activation('relu'))
model2.add(Dropout(0.5))

model2.add(Dense(256))
model2.add(Activation('relu'))
model2.add(Dropout(0.5))
model2.add(Dense(256))
model2.add(Activation('relu'))
model2.add(Dropout(0.5))
model2.add(Dense(128))

model2.add(Dense(num_labels))
model2.add(Activation('softmax'))
model2.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_7 (Dense)             (None, 256)               13056     
                                                                 
 activation_4 (Activation)   (None, 256)               0         
                                                                 
 dropout_3 (Dropout)         (None, 256)               0         
                                                                 
 dense_8 (Dense)             (None, 256)               65792     
                                                                 
 activation_5 (Activation)   (None, 256)               0         
                                                                 
 dropout_4 (Dropout)         (None, 256)               0         
                                                                 
 dense_9 (Dense)             (None, 256)              

In [35]:
model2.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=['accuracy'],
)


In [36]:
num_epochs =30
num_batch_size = 32

model2.fit(
          x_train, 
          y_train, 
          batch_size=num_batch_size, 
          epochs=num_epochs,
          validation_data=(x_test, y_test),
         )


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x7f17aefa42e0>

In [38]:
from sklearn.metrics import (
    f1_score,
    accuracy_score,
    recall_score,
    precision_score,
    confusion_matrix,
    roc_auc_score,
    
    classification_report,
    precision_recall_curve
)
import seaborn as sns
import matplotlib.pyplot as plt
def metrics_score(actual, predicted):
    print(classification_report(actual, predicted))
    cm = confusion_matrix(actual, predicted)
    plt.figure(figsize=(8,5))
    sns.heatmap(cm, annot=True,  fmt='.2f', xticklabels=['Not Attrite', 'Attrite'], yticklabels=['Not Attrite', 'Attrite'])
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.show()

In [40]:
y_pred_test = model2.predict(x_test)

metrics_score(y_test, y_pred_test)



ValueError: ignored

In [56]:
# function to extract features from the audion file
def extract_feature(file_name):
    # load the audio file
    audio_data, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
    
    # get the feature 
    feature = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=50)
    
    # scale the features
    feature_scaled = np.mean(feature.T,axis=0)
    
    # return the array of features
    return np.array([feature_scaled])

In [51]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import numpy as np
# Get predictions from your model
y_pred = model2.predict(x_test)

y_pred = np.round(y_pred).astype(int)
# Calculate evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print('Accuracy:', accuracy)
print('Precision:', precision)
print('Recall:', recall)
print('F1-score:', f1)



ValueError: ignored

In [52]:
# function to predict the feature
def print_prediction(file_name):
    
    # extract feature from the function defined above
    prediction_feature = extract_feature(file_name) 
    
    # get the id of label using argmax
    predicted_vector = np.argmax(model.predict(prediction_feature), axis=-1)
    
    # get the class label from class id
    predicted_class = le.inverse_transform(predicted_vector)
    
    # display the result
    print("The predicted class is:", predicted_class[0], '\n') 

In [57]:

file_name = data_dir+'01'+"/0_01_0.wav"

# get the output
print_prediction(file_name)

# play the file
ipd.Audio(file_name)

ValueError: ignored