<a href="https://colab.research.google.com/github/RohanMathur17/Scream_Detection/blob/main/Scream_Detection_Librosa.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [2]:
!pip install -q pydub

In [51]:
import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow.keras.layers import Input, Lambda, Conv2D, BatchNormalization
from tensorflow.keras.layers import Activation, MaxPool2D, Flatten, Dropout, Dense
from tensorflow.keras.models import Sequential
from sklearn.metrics import classification_report
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
from sklearn.preprocessing import LabelBinarizer
from tensorflow.keras.optimizers import Adam
from sklearn import metrics
from IPython.display import Audio
from matplotlib import pyplot as plt
from tqdm import tqdm
import os
import numpy as np
from scipy.io import wavfile 
import librosa
import pydub
import pandas as pd

# Getting Data 

In [20]:
def features_extractor(file_name):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') #loading audio files, #sample rate- numerical int value of each music file
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40) #spectogram? feature engineering for audio
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0) #scaling, mean , standardization?
    
    return mfccs_scaled_features

In [21]:
def absolute_file_paths(directory): #setting path
    path = os.path.abspath(directory)
    return [entry.path for entry in os.scandir(path) if entry.is_file()]

scream_paths = absolute_file_paths('/content/gdrive/MyDrive/Scream Data/positive')
non_scream_paths = absolute_file_paths('/content/gdrive/MyDrive/Scream Data/negative')
atmospheric_paths = absolute_file_paths('/content/gdrive/MyDrive/Scream Data/atmospheric_sounds')

In [22]:
extracted_features = [] #setting labels

for scream in scream_paths:
    data = features_extractor(scream)
    label = 'scream'
    extracted_features.append([data,label])

for non_scream in non_scream_paths:
    data = features_extractor(non_scream)
    label = 'non_scream'
    extracted_features.append([data,label])

for non_scream in atmospheric_paths:
    data = features_extractor(non_scream)
    label = 'atmospheric'
    extracted_features.append([data,label])

In [24]:
extracted_features_array = np.asarray(extracted_features)

  return array(a, dtype, copy=False, order=order)


In [35]:
extracted_features_audio = [i[0] for i in extracted_features_array]
extracted_features_audio = np.asarray(extracted_features_audio) 

extracted_features_labels = [i[1] for i in extracted_features_array]
extracted_features_labels = np.asarray(extracted_features_labels)

In [39]:
lb = LabelBinarizer()
labels = lb.fit_transform(extracted_features_labels)

In [41]:
### Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(extracted_features_audio,labels,test_size=0.3,random_state=0)

In [42]:
y_train.shape

(140, 3)

In [43]:
num_labels = y.shape[1]

In [44]:
model=Sequential() 
###first layer
model.add(Dense(100,input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###second layer
model.add(Dense(200))
model.add(Activation('relu'))
model.add(Dropout(0.5))
###third layer
model.add(Dense(100))
model.add(Activation('relu'))
model.add(Dropout(0.5))

###final layer
model.add(Dense(num_labels))
model.add(Activation('softmax'))

In [45]:
model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')

In [46]:
## Trianing my model
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime 

num_epochs = 100
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer],
          verbose=1, shuffle = True)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/100
1/5 [=====>........................] - ETA: 4s - loss: 62.2703 - accuracy: 0.3438
Epoch 00001: val_loss improved from inf to 13.42165, saving model to saved_models/audio_classification.hdf5
Epoch 2/100
1/5 [=====>........................] - ETA: 0s - loss: 44.4929 - accuracy: 0.2188
Epoch 00002: val_loss did not improve from 13.42165
Epoch 3/100
1/5 [=====>........................] - ETA: 0s - loss: 30.4269 - accuracy: 0.4062
Epoch 00003: val_loss improved from 13.42165 to 10.25115, saving model to saved_models/audio_classification.hdf5
Epoch 4/100
1/5 [=====>........................] - ETA: 0s - loss: 20.4712 - accuracy: 0.5312
Epoch 00004: val_loss improved from 10.25115 to 7.92399, saving model to saved_models/audio_classification.hdf5
Epoch 5/100
1/5 [=====>........................] - ETA: 0s - loss: 21.6101 - accuracy: 0.4688
Epoch 00005: val_loss improved from 7.92399 to 7.88339, saving model to saved_models/audio_classification.hdf5
Epoch 6/100
1/5 [=====>...........

In [47]:
test_accuracy=model.evaluate(X_test,y_test,verbose=0)
print(test_accuracy[1])

0.9344262480735779


In [58]:
predictions = model.predict(x=X_test)
print(classification_report(y_test.argmax(axis=1),
	predictions.argmax(axis=1), target_names=lb.classes_))


              precision    recall  f1-score   support

 atmospheric       0.95      0.95      0.95        19
  non_scream       0.90      1.00      0.95        19
      scream       0.95      0.87      0.91        23

    accuracy                           0.93        61
   macro avg       0.93      0.94      0.94        61
weighted avg       0.94      0.93      0.93        61



In [73]:
filename = '/content/scream.wav' #librosa python package
audio, sample_rate = librosa.load(filename, res_type='kaiser_fast') 
mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

#print(mfccs_scaled_features)
mfccs_scaled_features=mfccs_scaled_features.reshape(1,-1)
#print(mfccs_scaled_features)
#print(mfccs_scaled_features.shape)

labels = ['atmospheric', 'non_scream', 'scream']
labels.sort()
predict_x=model.predict(mfccs_scaled_features) 
classes_x=np.argmax(predict_x,axis=1)
print(labels[classes_x[0]])




scream
