In [1]:
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
from scipy import signal
from scipy.fft import fftshift
from matplotlib.pyplot import figure, show
import time

In [11]:
def getaudiodict(directory):
    
    audio_directory = directory + 'Recording/'
    audio_dictionary = {}

    for filename in os.listdir(audio_directory):
        # Get Filename Prefix
        filename_prefix = filename.split('.')[0]
        # Import the audio sample
        recorded_sample = np.load(os.path.join(audio_directory, filename))

        # Create a channels dictionary
        channels_data = {}
        for x in range(6):
            channels_data["channel_{0}".format(x+1)] = recorded_sample.reshape([-1,6])[:,x]
        if np.max(channels_data['channel_1']) < 1200:
            continue
        else:
            # Get Stacked Spectrogram 
            fs = 48000  # Sampling Rate 
            duration = len(channels_data['channel_1'])/fs
            original_features = []

            for channel_no in channels_data:         
                #Slicing the channel sample 
                max_index = np.where(channels_data[channel_no] == np.max(channels_data[channel_no]))[0][0]
                start = max_index + int(len(channels_data[channel_no])*0.0015 / duration)
                end = start + int(len(channels_data[channel_no])*0.03 / duration)
                sliced_data = channels_data[channel_no][start:end]

                # Create Spectrogram
                f, t, Sxx = signal.spectrogram(sliced_data, fs, nperseg=256)

                # Stack Spectrograms
                original_features.append(Sxx)
            
            condition_list = []
            for i in original_features:
                condition_list.append(i.shape == (129, 6))
            if all(condition_list):
                stacked_sample = np.stack(original_features, axis=0)
                audio_dictionary[filename_prefix] = stacked_sample
            else:
                continue

    return audio_dictionary
    
def getlabeldict(directory):

    images_directory = directory + 'Images/'
    label_dictionary = {}
    error_files = []
    
    for filename in os.listdir(images_directory):
        filename_prefix = filename.split('.')[0]

        img = cv2.imread(os.path.join(images_directory, filename))
        img_cropped = img[162:304, 231:383]
        gray = cv2.cvtColor(img_cropped,cv2.COLOR_BGR2GRAY)
        gray = np.float32(gray)

        corners = cv2.goodFeaturesToTrack(gray,1,0.01,15)
#         corners = cv2.goodFeaturesToTrack(gray,2,0.01,5)    # Corner detection parameters for 2 objects 
        corners = np.int0(corners)
        corners_tuple = []

        for i in corners:
            x, y = i[0][0], i[0][1]
            corners_tuple.append((x,y))

        if len(corners_tuple) == 1:
            x_coordinate = round((corners_tuple[0][0] / img_cropped.shape[1]), 3)
            y_coordinate = round((corners_tuple[0][1] / img_cropped.shape[0]), 3)
            coordiantes = [x_coordinate, y_coordinate]
            
#           Uncomment the following for two object detection
#             x_coordinate_1 = round((corners_tuple[0][0] / img_cropped.shape[1]), 3)
#             y_coordinate_1 = round((corners_tuple[0][1] / img_cropped.shape[0]), 3)
#             x_coordinate_2 = round((corners_tuple[1][0] / img_cropped.shape[1]), 3)
#             y_coordinate_2 = round((corners_tuple[1][1] / img_cropped.shape[0]), 3)
#             coordiantes = [x_coordinate_1, y_coordinate_1, x_coordinate_2, y_coordinate_2]

            label_dictionary[filename_prefix] = coordiantes
        else:
            error_files.append(filename_prefix)
        
    return label_dictionary

def getXandY(audio_dict, label_dict):
    x = []
    y = []
    for filename in audio_dict:
        if filename in label_dict:
            x.append(audio_dict[filename])
            y.append(label_dict[filename])
        else:
            continue
    X = np.stack(x, axis=0)
    Y = np.stack(y, axis=0)
    return X, Y

In [12]:
# Getting X and Y for Model v1. FFT size = 128 and Spectrogram size = (65, 12)

start = time.time()
directory_name = r'Enter your directory name'
audio_d = getaudiodict(directory_name)
label_d = getlabeldict(directory_name)
X, Y = getXandY(audio_d, label_d)
assert(X.shape[0] == Y.shape[0])

time taken for creating X and Y matrices is  2.8427886525789896 minutes


In [14]:
# Save created X and Y to respective npy files

np.save('Enter X Matrix Name', X)
np.save('Enter Y Matrix Name', Y)

(1497, 6, 129, 6) (1497, 2)
