In [None]:
# Importing all the required libraries

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
import os
import wave
import pylab
from pathlib import Path
from scipy import signal
from scipy.io import wavfile
from sklearn.metrics import confusion_matrix
import itertools
import random
import keras
import io

In [None]:
# Set paths to input and output data

INPUT_DIR = r'F:\00 PS-1\Project\imp\Training audio samples'
OUTPUT_DIR = r'F:\00 PS-1\Project\imp'
# OUTPUT_DIR --> Where you want to save the spectrogram images.

In [None]:
# Print names of 5 WAV files from the input path

parent_list = os.listdir(INPUT_DIR)
for i in range(5):
    print(parent_list[i])

In [None]:
# Plot first 3 WAV files as a waveform and a frequency spectrum

for i in range(3): 
    signal_wave = wave.open(os.path.join(INPUT_DIR, parent_list[i]), 'r')
    sample_rate = 16000
    sig = np.frombuffer(signal_wave.readframes(sample_rate), dtype=np.int16)

    plt.figure(figsize=(12,12))
    plot_a = plt.subplot(211)
    plot_a.set_title(parent_list[i])
    plot_a.plot(sig)
    plot_a.set_xlabel('sample rate * time')
    plot_a.set_ylabel('energy')

    plot_b = plt.subplot(212)
    plot_b.specgram(sig, NFFT=1024, Fs=sample_rate, noverlap=900)
    plot_b.set_xlabel('Time')
    plot_b.set_ylabel('Frequency')

plt.show()

In [None]:
# To plot spectrograms for each audio sample and save them.

# Utility function to get sound and frame rate info
def get_wav_info(wav_file):
    wav = wave.open(wav_file, 'r')
    frames = wav.readframes(-1)
    sound_info = pylab.frombuffer(frames, 'int16')
    frame_rate = wav.getframerate()
    wav.close()
    return sound_info, frame_rate

# For every recording, make a spectogram and save it as label_speaker_no.png
if not os.path.exists(os.path.join(OUTPUT_DIR, 'audio-images')):
    os.mkdir(os.path.join(OUTPUT_DIR, 'audio-images'))
    
for filename in os.listdir(INPUT_DIR):
    if "wav" in filename:
        file_path = os.path.join(INPUT_DIR, filename)
        file_stem = Path(file_path).stem
        target_dir = f'class_{file_stem[0]}'
        dist_dir = os.path.join(os.path.join(OUTPUT_DIR, 'audio-images'), target_dir)
        file_dist_path = os.path.join(dist_dir, file_stem)
        if not os.path.exists(file_dist_path + '.png'):
            if not os.path.exists(dist_dir):
                os.mkdir(dist_dir)
            file_stem = Path(file_path).stem
            sound_info, frame_rate = get_wav_info(file_path)
            pylab.specgram(sound_info, Fs=frame_rate)
            pylab.savefig(f'{file_dist_path}.png')
            pylab.close()

# Print the ten classes in our dataset
path_list = os.listdir(os.path.join(OUTPUT_DIR, 'audio-images'))
print("Classes: \n")
for i in range(4):
    print(path_list[i])
    
# File names for class 1
path_list = os.listdir(os.path.join(OUTPUT_DIR, 'audio-images/class_1'))
print("\nA few example files: \n")
for i in range(10):
    print(path_list[i])

In [None]:
# Declare constants
IMAGE_HEIGHT = 256
IMAGE_WIDTH = 256
BATCH_SIZE = 32
N_CHANNELS = 3
N_CLASSES = 4

# Make a dataset containing the training spectrograms
train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
                                             batch_size=BATCH_SIZE,
                                             validation_split=0.2,
                                             directory=os.path.join(OUTPUT_DIR, 'audio-images'),
                                             shuffle=True,
                                             color_mode='rgb',
                                             image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
                                             subset="training",
                                             seed=0)

# Make a dataset containing the validation spectrogram
valid_dataset = tf.keras.preprocessing.image_dataset_from_directory(
                                             batch_size=BATCH_SIZE,
                                             validation_split=0.2,
                                             directory=os.path.join(OUTPUT_DIR, 'audio-images'),
                                             shuffle=True,
                                             color_mode='rgb',
                                             image_size=(IMAGE_HEIGHT, IMAGE_WIDTH),
                                             subset="validation",
                                             seed=0)

In [None]:
plt.figure(figsize=(12, 12))
for images, labels in train_dataset.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.title(int(labels[i]))
        plt.axis("off")
plt.show()

In [None]:
# Function to prepare our datasets for modelling
def prepare(ds, augment=False):
    # Define our one transformation
    rescale = tf.keras.Sequential([tf.keras.layers.experimental.preprocessing.Rescaling(1./255)])
    flip_and_rotate = tf.keras.Sequential([
        tf.keras.layers.experimental.preprocessing.RandomFlip("horizontal_and_vertical"),
        tf.keras.layers.experimental.preprocessing.RandomRotation(0.2)
    ])
    
    # Apply rescale to both datasets and augmentation only to training
    ds = ds.map(lambda x, y: (rescale(x, training=True), y))
    if augment: ds = ds.map(lambda x, y: (flip_and_rotate(x, training=True), y))
    return ds

train_dataset = prepare(train_dataset, augment=False)
valid_dataset = prepare(valid_dataset, augment=False)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from keras import callbacks
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import GridSearchCV
from keras import regularizers

In [None]:
# Create CNN model
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Input(shape=(IMAGE_HEIGHT, IMAGE_WIDTH, N_CHANNELS)))
#model.add(tf.keras.layers.Conv2D(32, 3, strides=1, padding='same', activation='relu'))
#model.add(tf.keras.layers.BatchNormalization())
#model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
#model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Conv2D(64, 3, strides=2, padding='same', activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Conv2D(128, 3, padding='same', activation='relu'))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.MaxPooling2D(pool_size=(2, 2)))
model.add(tf.keras.layers.BatchNormalization())
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(256, activation='relu'))
model.add(tf.keras.layers.BatchNormalization())

# REGULARIZATION
# Add Dropout layer
model.add(tf.keras.layers.Dropout(0.2))
# Add L2 regularization to the dense layers
model.add(tf.keras.layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(0.01)))

model.add(tf.keras.layers.Dense(N_CLASSES, activation='softmax'))

# Compile model
model.compile(loss='sparse_categorical_crossentropy', optimizer=tf.keras.optimizers.RMSprop(), metrics=['accuracy'])

# Train model for 100 epochs, capture the history
earlystopping = callbacks.EarlyStopping(monitor="val_loss", mode="min", patience=5, restore_best_weights=True)
history = model.fit(train_dataset, epochs=100, validation_data=valid_dataset)

In [None]:
# Plot the loss curves for training and validation.
history_dict = history.history
loss_values = history_dict['loss']
val_loss_values = history_dict['val_loss']
epochs = range(1, len(loss_values)+1)

plt.figure(figsize=(8,6))
plt.plot(epochs, loss_values, 'bo', label='Training loss')
plt.plot(epochs, val_loss_values, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

In [None]:
# Plot the accuracy curves for training and validation.
acc_values = history_dict['accuracy']
val_acc_values = history_dict['val_accuracy']
epochs = range(1, len(acc_values)+1)

plt.figure(figsize=(8,6))
plt.plot(epochs, acc_values, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc_values, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# Compute the final loss and accuracy
final_loss, final_acc = model.evaluate(valid_dataset, verbose=0)
print("Final loss: {0:.3f}, final accuracy: {1:.3f}".format(final_loss, final_acc))

In [None]:
# Load the test dataset
TST_DIR = (r'F:\00 PS-1\Project\imp\Test Audio')
TES_DIR = (r'F:\00 PS-1\Project\imp')

np.seterr(divide = 'ignore') 
def get_wav_info(wav_file):
    wav = wave.open(wav_file, 'r')
    frames = wav.readframes(-1)
    sound_info = pylab.frombuffer(frames, 'int16')
    frame_rate = wav.getframerate()
    wav.close()
    return sound_info, frame_rate

# For every recording, make a spectogram and save it as label_class_no.png
if not os.path.exists(os.path.join(TES_DIR, 'Test Dataset')):
    os.mkdir(os.path.join(TES_DIR, 'Test Dataset'))
    
for filename in os.listdir(TST_DIR):
    if "wav" in filename:
        file_path = os.path.join(TST_DIR, filename)
        file_stem = Path(file_path).stem
        target_dir = f'class_{file_stem[0]}'
        dist_dir = os.path.join(os.path.join(TES_DIR, 'Test Dataset'), target_dir)
        file_dist_path = os.path.join(dist_dir, file_stem)
        if not os.path.exists(file_dist_path + '.png'):
            if not os.path.exists(dist_dir):
                os.mkdir(dist_dir)
            file_stem = Path(file_path).stem
            sound_info, frame_rate = get_wav_info(file_path)
            pylab.specgram(sound_info, Fs=frame_rate)
            pylab.savefig(f'{file_dist_path}.png')
            pylab.close()

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

test_datagen = ImageDataGenerator(rescale=1./255)  # Apply necessary preprocessing
TEST_DIR = (r'F:\00 PS-1\Project\imp\Test Dataset')
image_height = IMAGE_HEIGHT
image_width = IMAGE_WIDTH
test_dataset = test_datagen.flow_from_directory(
    directory=TEST_DIR,
    target_size=(image_width, image_height),
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)

# Make predictions
predictions = model.predict(test_dataset)

# Convert predictions to class labels (if needed)
predicted_labels = np.argmax(predictions, axis=1)

# Get the class labels of the test dataset
class_labels = test_dataset.class_indices

# Invert the class labels dictionary
inv_class_labels = {v: k for k, v in class_labels.items()}

# Convert predicted labels to class names
predicted_classes = [inv_class_labels[label] for label in predicted_labels]

# Print the predictions
for i in range(len(predicted_classes)):
    print(f"Image {i+1}: {predicted_classes[i]}")

In [None]:
import tkinter as tk
from tkinter import filedialog

def browse_file():
    global sample_dir  # Declare the global variable to be modified inside the function
    sample_dir = filedialog.askopenfilename()
    if sample_dir:
        print("Selected File:", sample_dir)

def create_gui():
    root = tk.Tk()
    root.title("File Explorer GUI")
    root.geometry("300x100")

    # Create a button to open the file explorer
    browse_button = tk.Button(root, text="Open File Explorer", command=browse_file)
    browse_button.pack(pady=10)

    # Create a button to close the popup
    close_button = tk.Button(root, text="Close", command = root.destroy)
    close_button.pack(pady=10)

    root.mainloop()

if __name__ == "__main__":
    create_gui()

print('sample_dir = ',sample_dir)      

In [None]:
# Testing for one particular sample

from tensorflow.keras.preprocessing.image import ImageDataGenerator

test_datagen = ImageDataGenerator(rescale=1./255)  # Apply necessary preprocessing
TEST_DIR = (r'F:\00 PS-1\Project\imp\Test Dataset')
image_height = IMAGE_HEIGHT
image_width = IMAGE_WIDTH
test_dataset = test_datagen.flow_from_directory(
    directory=TEST_DIR,
    target_size=(image_width, image_height),
    batch_size=BATCH_SIZE,
    class_mode=None,
    shuffle=False
)

# Path to that sample (...)
#sample_image_path = (r'F:\00 PS-1\Project\imp\Test Dataset\class_2\LV 100.png')

# Load the sample image
sample_image_path = sample_dir
sample_image = tf.keras.preprocessing.image.load_img(sample_image_path, target_size=(IMAGE_WIDTH, IMAGE_HEIGHT))
sample_image_array = tf.keras.preprocessing.image.img_to_array(sample_image)
sample_image_array = np.expand_dims(sample_image_array, axis=0)
sample_image_array = sample_image_array / 255.0  # Normalize the image

# Make prediction for the sample image
prediction = model.predict(sample_image_array)
predicted_label = np.argmax(prediction)

# Get the class labels of the test dataset
class_labels = test_dataset.class_indices

# Invert the class labels dictionary
inv_class_labels = {v: k for k, v in class_labels.items()}

# Convert predicted label to class name
predicted_class = inv_class_labels[predicted_label]

# Print the prediction for the sample image
print(f"Sample Image: {sample_image_path}")
print(f"Prediction: {predicted_class}")

In [None]:
from matplotlib import style
import tkinter as tk
from tkinter import ttk
from tkinter import *
from PIL import Image, ImageTk

LARGE_FONT= ("Helvetica", 20)
NORM_FONT= ("Helvetica", 14)
SMALL_FONT= ("Helvetica", 10)

if predicted_class == "class_0":
   output = "Single Man or Crawling Men"
   image_path = (r'F:\00 PS-1\Project\imp\popup images\0_Single Man.gif')

elif predicted_class == "class_1":
   output = "Group of Men"
   image_path = (r'F:\00 PS-1\Project\imp\popup images\1_Group of Men.gif')

elif predicted_class == "class_2":
   output = "Heavy Vehicle"
   image_path = (r'F:\00 PS-1\Project\imp\popup images\3_Heavy Vehicle.gif')

elif predicted_class == "class_3":
   output = "Light Vehicle"
   image_path = (r'F:\00 PS-1\Project\imp\popup images\2_Light Vehicle.gif')


popup = tk.Tk()
popup.title("Target Info")
popup.geometry("400x400")

output_label = tk.Label(popup, text=output, font=NORM_FONT)
output_label.pack()

# Create a photoimage object of the image in the path
image1 = Image.open(image_path)
image1 = image1.resize((350, 350), Image.LANCZOS)
test = ImageTk.PhotoImage(image1)
label1 = tk.Label(popup, image=test)
label1.image = test
label1.pack(pady=10)
    
popup.mainloop()    

In [None]:
# wait

In [None]:
# Clean the output dir (To be used when the training dataset needs to be changed)
import shutil
shutil.rmtree('/kaggle/working/audio-images')

In [None]:
# Some extra code snippets we used: (Some extra ideas)

In [None]:
OUTPUT_DIR2 = r'F:\00 PS-1\Project\imp\2nd order'
OUTPUT_DIR3 = r'F:\00 PS-1\Project\imp\audio-images'

In [None]:
# Utility function to get sound and frame rate info
def get_wav_info(wav_file):
    wav = wave.open(wav_file, 'r')
    frames = wav.readframes(-1)
    sound_info = pylab.frombuffer(frames, 'int16')
    frame_rate = wav.getframerate()
    wav.close()
    return sound_info, frame_rate

# For every recording, make a spectogram and save it as label_speaker_no.png
if not os.path.exists(os.path.join(OUTPUT_DIR2, 'audio-images')):
    os.mkdir(os.path.join(OUTPUT_DIR2, 'audio-images'))
    
for filename in os.listdir(INPUT_DIR):
    if "wav" in filename:
        file_path = os.path.join(INPUT_DIR, filename)
        file_stem = Path(file_path).stem
        target_dir = f'class_{file_stem[:3]}'
        dist_dir = os.path.join(os.path.join(OUTPUT_DIR2, 'audio-images'), target_dir)
        file_dist_path = os.path.join(dist_dir, file_stem)
        if not os.path.exists(file_dist_path + '.png'):
            if not os.path.exists(dist_dir):
                os.mkdir(dist_dir)
            file_stem = Path(file_path).stem
            sound_info, frame_rate = get_wav_info(file_path)
            pylab.specgram(sound_info, Fs=frame_rate)
            pylab.savefig(f'{file_dist_path}.png')
            pylab.close()

# Print the ten classes in our dataset
path_list = os.listdir(os.path.join(OUTPUT_DIR2, 'audio-images'))
print("Classes: \n")
for i in range(16):
    print(path_list[i])
    
# File names for class 1
path_list = os.listdir(os.path.join(OUTPUT_DIR2, 'audio-images/class_1_1'))
print("\nA few example files: \n")
for i in range(10):
    print(path_list[i])