<a href="https://colab.research.google.com/github/Am5678/multimodal-Crime-video-analysis/blob/main/Untitled12.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import librosa
import IPython.display as ipd
import librosa.display
import os
from scipy.io import wavfile
%matplotlib inline

In [None]:
!pip install mutagen


In [None]:
import mutagen
import mutagen.wave

In [None]:
metadata = pd.read_csv('data.csv')


In [None]:
metadata.head()
class_distr = metadata['class'].value_counts()
print(class_distr)


In [None]:
# Plot class distributions
class_distr = metadata['class'].value_counts()
plt.figure(figsize=(8,6))
plt.bar(class_distr.index, class_distr.values, alpha=0.8)
plt.title('Class Distribution')
plt.ylabel('Number of Occurrences', fontsize=12)
plt.xlabel('Class Name', fontsize=12)
plt.xticks(rotation=90)
plt.show()

In [None]:
# Check for missing values in the DataFrame
missing_values = metadata.isnull().sum()

# Print the results
print(missing_values)

In [None]:
def create_dataset_df(csv_file):
    dataset_df = pd.read_csv(csv_file)
    filepaths = []
    for i, row in dataset_df.iterrows():
        filepaths.append(os.path.join('audio', 'fold'+str(row['fold']), row['slice_file_name']))
    dataset_df['filepath'] = filepaths
    return dataset_df

In [None]:
metadata_df = create_dataset_df('data.csv')


In [None]:
def get_audio_metadata_mutagen(filepath):
    metadata = {}
    f = mutagen.wave.WAVE(filepath)
    metadata['length'] = f.info.length
    metadata['bitrate'] = f.info.bitrate
    metadata['channels'] = f.info.channels
    metadata['sample_rate'] = f.info.sample_rate
    metadata['bits_per_sample'] = f.info.bits_per_sample
    return metadata


def compute_audio_statistics(dataset_df):
    metadata_dict = {'length': [], 'bitrate': [], 'channels': [], 'sample_rate': [], 'bits_per_sample': []}
    # Extract metadata
    for filepath in dataset_df['filepath']:
        metadata = get_audio_metadata_mutagen(filepath)
        for key in metadata_dict.keys():
            metadata_dict[key].append(metadata[key])
    # Add new columns to dataframe
    for key in metadata_dict.keys():
        dataset_df[key] = metadata_dict[key]

    return dataset_df

In [None]:
metadata_df = metadata_df.drop(columns=['fold', 'slice_file_name', 'fsID', 'start', 'end'])
audio_statistics_df = compute_audio_statistics(metadata_df)
audio_statistics_df.describe()


In [None]:
audio_statistics_df['sample_rate'].value_counts(), audio_statistics_df['bits_per_sample'].value_counts()
audio_statistics_df.groupby('class').describe()
# Check for outliers using boxplot
plt.figure(figsize=(10,6))
sns.boxplot(x='class', y='start', data=metadata)
plt.xticks(rotation=90)
plt.show()

In [None]:
# Plot audio file duration distributions
fig, ax = plt.subplots()
durations = []
for i in range(len(metadata)):
    file_path = os.path.join('audio', 'fold'+str(metadata.loc[i, 'fold']), metadata.loc[i, 'slice_file_name'])
    signal, sr = librosa.load(file_path, sr=None, mono=True)
    durations.append(signal.shape[0]/sr)
ax.hist(durations, bins=20)
ax.set(title='Audio File Duration Distribution', xlabel='Duration (s)', ylabel='Count')
plt.show()

In [None]:
# Listen to a few audio files
for i in range(5):
    file_path = os.path.join('audio', 'fold'+str(metadata.loc[i, 'fold']), metadata.loc[i, 'slice_file_name'])
    signal, sr = librosa.load(file_path, sr=None, mono=True)
    print('Playing audio file:', metadata.loc[i, 'slice_file_name'])
    librosa.display.waveshow(signal, sr=sr)
    plt.show()

In [None]:
file_path = 'audio/fold1/7383-3-0-0.wav'
fs, signal = wavfile.read(file_path)
plt.plot(signal)
plt.title('Waveform')
plt.xlabel('Time (samples)')
plt.ylabel('Amplitude')
plt.show()

In [None]:
#MFCC
from tqdm import tqdm
# Extracting MFCC's For every audio file
def features_extractor(file_name):
    audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast')
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)

    return mfccs_scaled_features

In [None]:
audio_dataset_path = 'audio'
extracted_features=[]
for index_num,row in tqdm(metadata.iterrows()):
    file_name = os.path.join(os.path.abspath(audio_dataset_path),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    final_class_labels=row["class"]
    data=features_extractor(file_name)
    extracted_features.append([data,final_class_labels])

In [None]:
# Converting extracted_features to Pandas dataframe
extracted_features_df=pd.DataFrame(extracted_features,columns=['feature','class'])
extracted_features_df.head(10)

In [None]:
extracted_features_df.to_csv("extracted_features.csv")
# Data Splitting
X=np.array(extracted_features_df['feature'].tolist())
y=np.array(extracted_features_df['class'].tolist())

In [None]:
X.shape


In [None]:
y


In [None]:
# Label Encoding
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
y=to_categorical(labelencoder.fit_transform(y))

In [None]:
y


In [None]:
# Training Testing Sets
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [None]:
import tensorflow as tf
print(tf.__version__)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation
from sklearn import metrics

In [None]:
num_labels=y.shape[1]
print(num_labels)

In [None]:
#ANN
model=Sequential()
#first layer
model.add(Dense(1600,input_shape=(40,)))
model.add(Activation('relu'))
model.add(Dropout(0.5))
#second layer
model.add(Dense(800))
model.add(Activation('relu'))
model.add(Dropout(0.5))
#third layer
model.add(Dense(400))
model.add(Activation('relu'))
model.add(Dropout(0.5))

#final layer
model.add(Dense(num_labels))
model.add(Activation('softmax'))

In [None]:
model.summary()
model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')
# Model training
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime
num_epochs = 100
num_batch_size = 128

checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification.h5', verbose=1, save_best_only=True)
start = datetime.now()

model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)
history = model.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

In [None]:
# Plot the training and validation loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend(['train', 'val'], loc='upper right')
plt.show()

# Plot the training and validation accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend(['train', 'val'], loc='lower right')
plt.show()

In [None]:
def predict_class_label(filename, model_name):
    audio, sample_rate = librosa.load(filename, res_type='kaiser_fast')
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
    mfccs_scaled_features=mfccs_scaled_features.reshape(1,-1)
    predicted_label = np.argmax(model_name.predict(mfccs_scaled_features), axis=-1)
    print('Predicted Label:',predicted_label)
    prediction_class = labelencoder.inverse_transform(predicted_label)
    prediction_class[0]
    return prediction_class[0]

In [None]:
predict_class_label("dog_barking.wav", model)
predict_class_label("gun_1.wav", model)
predict_class_label("siren.wav", model)
predict_class_label("car.wav", model)


In [None]:
import librosa.filters
def features_extractor_spectral_substraction(file_name):
    # Load the audio file
    audio, sample_rate = librosa.load(file_name, mono=True, sr=22050)

    # Apply preprocessing steps

    # Apply preemphasis
    audio_filtered = librosa.effects.preemphasis(audio, coef=0.95)



    # Extract MFCC features from the preprocessed audio
    mfccs_features = librosa.feature.mfcc(y=audio_filtered, sr=sample_rate, n_mfcc=40, n_fft=1024)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)

    # Normalize the features
    mfccs_scaled_features = (mfccs_scaled_features - np.mean(mfccs_scaled_features)) / np.std(mfccs_scaled_features)

    return mfccs_scaled_features

In [None]:
# Features extraction from all audio files (MFCC)
extracted_features_subs=[]
for index_num,row in tqdm(metadata.iterrows()):
    file_name = os.path.join(os.path.abspath(audio_dataset_path),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    final_class_labels=row["class"]
    data=features_extractor_spectral_substraction(file_name)
    extracted_features_subs.append([data,final_class_labels])

In [None]:
# Converting extracted_features to Pandas dataframe
extracted_features_df=pd.DataFrame(extracted_features_subs,columns=['feature','class'])
extracted_features_df.head(10)

In [None]:
# Data Frame Saving
extracted_features_df.to_csv("extracted_features_processed.csv")

In [None]:
# Data Splitting
X=np.array(extracted_features_df['feature'].tolist())
y=np.array(extracted_features_df['class'].tolist())

In [None]:
y


In [None]:
# Label Encoding
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder
labelencoder=LabelEncoder()
y=to_categorical(labelencoder.fit_transform(y))

In [None]:
y


In [None]:
# Training Testing Sets
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=0)

In [None]:
model_2=Sequential()
#first layer
model_2.add(Dense(1600,input_shape=(40,)))
model_2.add(Activation('relu'))
model_2.add(Dropout(0.5))
#second layer
model_2.add(Dense(800))
model_2.add(Activation('relu'))
model_2.add(Dropout(0.5))
#third layer
model_2.add(Dense(400))
model_2.add(Activation('relu'))
model_2.add(Dropout(0.5))

#final layer
model_2.add(Dense(num_labels))
model_2.add(Activation('softmax'))

In [None]:
model_2.summary()
!pip install statsmodels


In [None]:
from statsmodels.tsa.api import SimpleExpSmoothing
from keras.callbacks import EarlyStopping

# Compile the model with categorical crossentropy loss, accuracy metric, and Adam optimizer
model_2.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')

# Define the number of epochs and batch size
num_epochs = 100
num_batch_size = 128

# Define callbacks for saving the best model and early stopping
checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification_high.h5', verbose=1, save_best_only=True)
early_stopping = EarlyStopping(monitor='val_loss', patience=5, verbose=1)

# Start the timer
start = datetime.now()

# Fit the model with training data and validation data, using callbacks for saving the best model and early stopping
history = model_2.fit(X_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(X_test, y_test), callbacks=[checkpointer, early_stopping], verbose=1)

# Calculate the duration of the training
duration = datetime.now() - start
print("Training completed in time: ", duration)

# Calculate moving averages of loss and accuracy
window_size = 10
loss_rolling = pd.Series(history.history['loss']).rolling(window_size).mean().tolist()
accuracy_rolling = pd.Series(history.history['accuracy']).rolling(window_size).mean().tolist()

# Apply exponential smoothing to loss and accuracy
alpha = 0.3
loss_smoothed = SimpleExpSmoothing(history.history['loss']).fit(smoothing_level=alpha).fittedvalues.tolist()
accuracy_smoothed = SimpleExpSmoothing(history.history['accuracy']).fit(smoothing_level=alpha).fittedvalues.tolist()

In [None]:
# Plot the training and validation loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper right')
plt.show()

# Plot the training and validation accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='lower right')
plt.show()

In [None]:
from sklearn.metrics import confusion_matrix, classification_report, roc_auc_score
import numpy as np

# Predict the class probabilities for the test data
y_prob = model_2.predict(X_test)

# Convert the probabilities to class labels
y_pred = np.argmax(y_prob, axis=1)
y_true = np.argmax(y_test, axis=1)

# Calculate the confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Calculate the sensitivity and specificity
sensitivity = cm[0, 0] / (cm[0, 0] + cm[0, 1])
specificity = cm[1, 1] / (cm[1, 0] + cm[1, 1])

# Calculate the classification report
class_report = classification_report(y_true, y_pred)

# Calculate the loss
loss, accuracy = model_2.evaluate(X_test, y_test)

# Calculate the AUC
auc = roc_auc_score(y_true, y_prob, multi_class='ovr')

# Print the results
print("Confusion Matrix:")
print(cm)
print("Sensitivity:", sensitivity)
print("Specificity:", specificity)
print("Classification Report:")
print(class_report)
print("Loss:", loss)
print("AUC:", auc)

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

# Define the class names
class_names = ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 'engine_idling', 'gun_shot', 'jackhammer', 'siren', 'street_music']

# Plot the confusion matrix
sns.set(font_scale=1.4)
sns.heatmap(cm, annot=True, annot_kws={"size": 16}, cmap='Purples', xticklabels=class_names, yticklabels=class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
predict_class_label("gun_1.wav", model_2)


In [None]:
#resnet
import os
import pandas as pd
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
from tqdm import tqdm
import shutil

In [None]:
# set the path to the UrbanSound8K dataset
data_dir = 'UrbanSound8K/audio'

# read the metadata CSV file into a Pandas DataFrame
metadata = pd.read_csv('UrbanSound8K/metadata/UrbanSound8K.csv')

output_dir = 'UrbanSound8K/spectrograms'

# create a dictionary to map file names to class labels
label_dict = {class_name: i for i, class_name in enumerate(metadata['class'].unique())}

# split the metadata into 10 subsets
metadata_splits = np.array_split(metadata, 10)

# loop through all audio files in each subset with a progress bar
for split_index, metadata_split in enumerate(metadata_splits):
    for index, row in tqdm(metadata_split.iterrows(), total=len(metadata_split), desc=f"Processing subset {split_index+1}/10"):
        file_name = row['slice_file_name']
        class_name = row['class']
        label = label_dict[class_name]
        path = os.path.join(data_dir, f'fold{row["fold"]}', file_name)

        # load audio file and extract features
        y, sr = librosa.load(path, sr=None, duration=4, mono=True, res_type='kaiser_fast') # load audio file with no resampling and duration of 4 seconds
        y = librosa.effects.trim(y, top_db=20)[0] # remove silent portions
        ## y = librosa.decompose.nn_filter(y, aggregate=np.median, metric='cosine', width=int(librosa.time_to_samples(0.025))) # noise reduction using nn_filter
        y = librosa.util.normalize(y) # normalize audio

        ft = np.abs(librosa.stft(y)) # calculate Fourier Transform (FT) magnitude
        mfccs = librosa.feature.mfcc(S=librosa.power_to_db(ft), n_mfcc=20) # calculate MFCCs from FT magnitude

        # plot MFCC spectrogram and save as image
        fig, ax = plt.subplots()
        img = librosa.display.specshow(mfccs, x_axis='time', ax=ax)
        fig.colorbar(img, ax=ax)
        ax.set(title='MFCC Spectrogram', xlabel='Time', ylabel='MFCC')
        plt.savefig(os.path.join(output_dir, f'{class_name}_{file_name[:-4]}.png'), bbox_inches='tight', pad_inches=0)
        plt.close(fig)

print(f"Number of images saved: {len(os.listdir(output_dir))}")

In [None]:
dataset_path = "spectrograms"

# Loop through all image files in the dataset folder
for file_name in os.listdir(dataset_path):
    # Get the class name from the file name
    class_name = file_name.split("_")[0]
    # Define the new folder path for this class
    class_folder = os.path.join(dataset_path, class_name)
    # If the folder doesn't exist, create it
    if not os.path.exists(class_folder):
        os.makedirs(class_folder)
    # Define the path to the current file
    file_path = os.path.join(dataset_path, file_name)
    # Define the path to the new file location in the class folder
    new_file_path = os.path.join(class_folder, file_name)
    # Move the file to the new folder
    shutil.move(file_path, new_file_path)

In [None]:
import random


# Define the path to your dataset
dataset_path = "spectrograms"

# Define the path to the directory to store the training, validation, and test sets
train_path = "train"
valid_path = "valid"
test_path = "test"

# Define the train/validation/test split ratios
train_ratio = 0.7
valid_ratio = 0.2
test_ratio = 0.1

# Create the subdirectories in the training, validation, and test directories
for split_path in [train_path, valid_path, test_path]:
    for class_name in os.listdir(dataset_path):
        if not class_name.startswith('.'):
            os.makedirs(os.path.join(split_path, class_name), exist_ok=True)

# Loop through each class folder and split the images into train/validation/test sets
for class_name in os.listdir(dataset_path):
    if not class_name.startswith('.'):
        class_path = os.path.join(dataset_path, class_name)
        images = os.listdir(class_path)
        images = [image for image in images if not image.startswith('.')]
        num_images = len(images)

        # Shuffle the images
        random.shuffle(images)

        # Split the images into train/validation/test sets
        num_train = int(num_images * train_ratio)
        num_valid = int(num_images * valid_ratio)
        num_test = num_images - num_train - num_valid

        train_images = images[:num_train]
        valid_images = images[num_train:num_train+num_valid]
        test_images = images[num_train+num_valid:]

        # Copy the images into their respective split folders
        for image in train_images:
            src_path = os.path.join(class_path, image)
            dst_path = os.path.join(train_path, class_name, image)
            shutil.copy(src_path, dst_path)

        for image in valid_images:
            src_path = os.path.join(class_path, image)
            dst_path = os.path.join(valid_path, class_name, image)
            shutil.copy(src_path, dst_path)

        for image in test_images:
            src_path = os.path.join(class_path, image)
            dst_path = os.path.join(test_path, class_name, image)
            shutil.copy(src_path, dst_path)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the paths to the training, validation, and test directories
train_path = 'train'
valid_path = 'valid'
test_path = 'test'

# Define the image size and batch size
img_size = (224, 224)
batch_size = 32

# Define the pre-trained model and freeze its layers
pretrained_model = ResNet50(weights='imagenet', include_top=False, input_shape=img_size + (3,))
for layer in pretrained_model.layers:
    layer.trainable = False

# Define the model architecture
model = Sequential()
model.add(pretrained_model)
model.add(Flatten())
model.add(Dense(1024, activation='relu'))
model.add(Dense(10, activation='softmax'))

# Compile the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Define the data generators for training, validation, and test sets
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(train_path, target_size=img_size, batch_size=batch_size, class_mode='categorical')

valid_datagen = ImageDataGenerator(rescale=1./255)
valid_generator = valid_datagen.flow_from_directory(valid_path, target_size=img_size, batch_size=batch_size, class_mode='categorical')

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(test_path, target_size=img_size, batch_size=batch_size, class_mode='categorical')

# Train the model
history = model.fit(train_generator, epochs=280, validation_data=valid_generator)

# Evaluate the model on the test set
model.evaluate(test_generator)
model.save("ResNet50_450epochs.h5")

In [None]:
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

# Plot the training and validation loss over epochs
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
from sklearn.metrics import classification_report, confusion_matrix, roc_auc_score, matthews_corrcoef

# Make predictions on the test set
y_pred = model.predict(test_generator)
y_pred = np.argmax(y_pred, axis=1)
y_true = test_generator.classes

# Calculate classification report and confusion matrix
print('Classification Report:')
print(classification_report(y_true, y_pred))
print('Confusion Matrix:')
print(confusion_matrix(y_true, y_pred))

# Calculate AUC score
y_pred_proba = model.predict(test_generator)
print('AUC Score:', roc_auc_score(test_generator.labels, y_pred_proba, multi_class='ovr'))

# Calculate correlation matrix
print('Correlation Matrix:')
print(np.corrcoef(y_true, y_pred))
print('Matthews Correlation Coefficient:', matthews_corrcoef(y_true, y_pred))

In [None]:
from sklearn.metrics import confusion_matrix


# Define the class names
class_names = ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 'engine_idling', 'gun_shot', 'jackhammer', 'siren', 'street_music']

# Generate predictions for the test set
y_pred = model.predict(test_generator).argmax(axis=1)
y_true = test_generator.classes

# Compute the confusion matrix
cm = confusion_matrix(y_true, y_pred)

# Plot the confusion matrix
plt.imshow(cm, interpolation='nearest', cmap=plt.cm.Blues)
plt.title('Confusion Matrix')
plt.colorbar()
tick_marks = np.arange(len(class_names))
plt.xticks(tick_marks, class_names, rotation=45)
plt.yticks(tick_marks, class_names)
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()

In [None]:
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping
from tensorflow.keras.layers import Dropout, Flatten, Dense
from tensorflow.keras import regularizers
from tensorflow.keras.applications import ResNet50V2
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the data generators for training, validation, and test sets
train_datagen = ImageDataGenerator(rescale=1./255)
train_generator = train_datagen.flow_from_directory(train_path, target_size=img_size, batch_size=batch_size, class_mode='categorical')

valid_datagen = ImageDataGenerator(rescale=1./255)
valid_generator = valid_datagen.flow_from_directory(valid_path, target_size=img_size, batch_size=batch_size, class_mode='categorical')

test_datagen = ImageDataGenerator(rescale=1./255)
test_generator = test_datagen.flow_from_directory(test_path, target_size=img_size, batch_size=batch_size, class_mode='categorical')

# Load the pre-trained model
pretrained_model = ResNet50V2(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Unfreeze the last few layers of the pre-trained model
for layer in pretrained_model.layers[-10:]:
    layer.trainable = True

# Define the model architecture
model2 = Sequential()
model2.add(pretrained_model)
model2.add(Flatten())
model2.add(Dense(1024, activation='relu'))
model2.add(Dropout(0.5))
model2.add(Dense(1024, activation='relu', kernel_regularizer=regularizers.l2(0.001)))
model2.add(Dense(10, activation='softmax'))

# Define the learning rate schedule
def lr_schedule(epoch):
    if epoch < 500:
        return 0.001
    elif epoch < 1000:
        return 0.0001
    else:
        return 0.00001

# Define the callbacks
lr_scheduler = LearningRateScheduler(lr_schedule)
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

# Compile the model
model2.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

# Train the model with the callbacks
history = model2.fit(train_generator, epochs=1500, validation_data=valid_generator, callbacks=[lr_scheduler, early_stopping])

# Evaluate the model on the test set
model2.evaluate(test_generator)

model2.save("ResNet50_erl.h5")

In [None]:
# Plot the training and validation accuracy over epochs
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

# Plot the training and validation loss over epochs
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model Loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
def get_mfcc_spectrogram(audio_path, output_dir):
    y, sr = librosa.load(audio_path, sr=None, duration=4, mono=True, res_type='kaiser_fast')
    y = librosa.effects.trim(y, top_db=20)[0]
    y = librosa.util.normalize(y)
    ft = np.abs(librosa.stft(y))
    mfccs = librosa.feature.mfcc(S=librosa.power_to_db(ft), n_mfcc=20)

    fig, ax = plt.subplots()
    img = librosa.display.specshow(mfccs, x_axis='time', ax=ax)
    fig.colorbar(img, ax=ax)
    ax.set(title='MFCC Spectrogram', xlabel='Time', ylabel='MFCC')
    image_path = os.path.join(output_dir, os.path.basename(audio_path)[:-4] + '.png')
    plt.savefig(image_path, bbox_inches='tight', pad_inches=0)
    plt.close(fig)

    return image_path

In [None]:
audio_path = 'car.wav'
output_dir = '/home/msc1/Desktop/ASB /'
image_path = get_mfcc_spectrogram(audio_path, output_dir)

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array


# Load the image
image = load_img(image_path, target_size=(224, 224))
image = img_to_array(image)

# Preprocess the image
image /= 255.0
image = np.expand_dims(image, axis=0)

# Make a prediction using the model
prediction = model.predict(image)

# Assuming the prediction output is stored in the variable 'prediction'
predicted_class = np.argmax(prediction)

# To get the name of the predicted class, you can use the 'class_indices' attribute of the generator used for inference
class_names = list(train_generator.class_indices.keys())
predicted_class_name = class_names[predicted_class]

print(predicted_class_name)

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array


# Load the image
image = load_img(image_path, target_size=(224, 224))
image = img_to_array(image)

# Preprocess the image
image /= 255.0
image = np.expand_dims(image, axis=0)

# Make a prediction using the model
prediction = model2.predict(image)

# Assuming the prediction output is stored in the variable 'prediction'
predicted_class = np.argmax(prediction)

# To get the name of the predicted class, you can use the 'class_indices' attribute of the generator used for inference
class_names = list(train_generator.class_indices.keys())
predicted_class_name = class_names[predicted_class]

print(predicted_class_name)

In [None]:
audio_path = 'gun_1.wav'
output_dir = '/home/msc1/Desktop/ASB /'
image_path = get_mfcc_spectrogram(audio_path, output_dir)

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array


# Load the image
image = load_img(image_path, target_size=(224, 224))
image = img_to_array(image)

# Preprocess the image
image /= 255.0
image = np.expand_dims(image, axis=0)

# Make a prediction using the model
prediction = model.predict(image)

# Assuming the prediction output is stored in the variable 'prediction'
predicted_class = np.argmax(prediction)

# To get the name of the predicted class, you can use the 'class_indices' attribute of the generator used for inference
class_names = list(train_generator.class_indices.keys())
predicted_class_name = class_names[predicted_class]

print(predicted_class_name)

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.image import load_img, img_to_array


# Load the image
image = load_img(image_path, target_size=(224, 224))
image = img_to_array(image)

# Preprocess the image
image /= 255.0
image = np.expand_dims(image, axis=0)

# Make a prediction using the model
prediction = model2.predict(image)

# Assuming the prediction output is stored in the variable 'prediction'
predicted_class = np.argmax(prediction)

# To get the name of the predicted class, you can use the 'class_indices' attribute of the generator used for inference
class_names = list(train_generator.class_indices.keys())
predicted_class_name = class_names[predicted_class]

print(predicted_class_name)