In [7]:
# Imports
import librosa, librosa.display
import matplotlib.pyplot as plt
import os
import numpy as np
import math
import json
import cv2

In [8]:
# Paths
DATASET_AUDIO_TRAIN = "EmotionDataset/Train/Audio"
DATASET_IMAGE_TRAIN = "EmotionDataset/Train/Image"
JSON_TRAIN = "json_storage/data.json"

DATASET_AUDIO_TRAIN = "EmotionDataset/Test/Audio"
DATASET_IMAGE_TRAIN = "EmotionDataset/Test/Image"
JSON_TRAIN = "json_storage/test.json"

# Audio Var
SAMPLE_RATE = 22050
DURATION = 2
SAMPLES_PER_TRACK = SAMPLE_RATE * DURATION

# Image Var
IMG_SIZE = 48

In [9]:
# Process audio and image data - store in data.json file
def save_data(audio_path, image_path, json_path, n_mfcc=13, n_fft=2048, hop_length=512, num_segments=1):
    data = {
        "mapping": [],
        "mfcc": [],
        "image": [],
        "labels": []
    }
    
    # how many samples for each audio input
    num_samples_per_segment = int(SAMPLES_PER_TRACK / num_segments) 
    # number of mfccs if samples are made
    expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment / hop_length) 
    
    # walk through audio directories
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(audio_path)):
        if dirpath is not audio_path:
            dirpath_components = dirpath.split("/")
            semantic_label = dirpath_components[-1]
            # store the directories opened
            data["mapping"].append(semantic_label)
            
            print("\nProcessing {}".format(semantic_label))
            
            for f in filenames:
                try:
                    file_path = os.path.join(dirpath, f)
                    signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
                except Exception as e:                                                    
                    print('Audio failed to process: ' + e)
                
                for s in range(num_segments):
                    # Process audio
                    start_sample = num_samples_per_segment * s
                    finish_sample = start_sample + num_samples_per_segment
                        
                    mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample],
                                               sr=sr,
                                               n_fft=n_fft,
                                               n_mfcc=n_mfcc,
                                               hop_length=hop_length)

                    mfcc = mfcc.T
                    
                    if len(mfcc) == expected_num_mfcc_vectors_per_segment:
                        # store mfcc data
                        data["mfcc"].append(mfcc.tolist())
                        # store audio type
                        data["labels"].append(i-1)
                        print("{}, segment:{}".format(file_path, s+1))
    
    # walk through image directories
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(image_path)):
        if dirpath is not image_path:
            dirpath_components = dirpath.split("/")
            semantic_label = dirpath_components[-1]
            # store the directories opened
            data["mapping"].append(semantic_label)
            
            print("\nProcessing {}".format(semantic_label))
            
            for f in filenames:
                file_path = os.path.join(dirpath, f)
                try:
                    # process image
                    img_array = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) 
                    sized_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                    # store image data
                    data["image"].append(sized_array.tolist())
                    print("{}".format(file_path))
                except Exception as e:                                                    
                    print('Image failed to process: ' + e)
    
    # dump stored data into json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
        

save_data(DATASET_AUDIO_TRAIN, DATASET_IMAGE_TRAIN, JSON_TRAIN, num_segments=1)


Processing Audio\Happy
EmotionDataset/Test/Audio\Happy\fate.wav, segment:1
EmotionDataset/Test/Audio\Happy\gap.wav, segment:1
EmotionDataset/Test/Audio\Happy\great.wav, segment:1
EmotionDataset/Test/Audio\Happy\half.wav, segment:1
EmotionDataset/Test/Audio\Happy\happy.wav, segment:1
EmotionDataset/Test/Audio\Happy\hug.wav, segment:1
EmotionDataset/Test/Audio\Happy\jail.wav, segment:1
EmotionDataset/Test/Audio\Happy\keep.wav, segment:1
EmotionDataset/Test/Audio\Happy\kick.wav, segment:1
EmotionDataset/Test/Audio\Happy\lamp.wav, segment:1
EmotionDataset/Test/Audio\Happy\lean.wav, segment:1
EmotionDataset/Test/Audio\Happy\make.wav, segment:1
EmotionDataset/Test/Audio\Happy\mend.wav, segment:1
EmotionDataset/Test/Audio\Happy\neutral.wav, segment:1
EmotionDataset/Test/Audio\Happy\pain.wav, segment:1
EmotionDataset/Test/Audio\Happy\pen.wav, segment:1
EmotionDataset/Test/Audio\Happy\rain.wav, segment:1
EmotionDataset/Test/Audio\Happy\sad.wav, segment:1
EmotionDataset/Test/Audio\Happy\sheep.w

In [10]:
def save_test_data(audio_path, image_path, json_path, n_mfcc=13, n_fft=2048, hop_length=512, num_segments=1):
    data = {
        "mapping": [],
        "mfcc": [],
        "image": [],
        "labels": []
    }
    
    # how many samples for each audio input
    num_samples_per_segment = int(SAMPLES_PER_TRACK / num_segments) 
    # number of mfccs if samples are made
    expected_num_mfcc_vectors_per_segment = math.ceil(num_samples_per_segment / hop_length) 
    
    # walk through audio directories
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(audio_path)):
        if dirpath is not audio_path:
            dirpath_components = dirpath.split("/")
            semantic_label = dirpath_components[-1]
            # store the directories opened
            data["mapping"].append(semantic_label)
            
            print("\nProcessing {}".format(semantic_label))
            
            for f in filenames:
                try:
                    file_path = os.path.join(dirpath, f)
                    signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
                except Exception as e:                                                    
                    print('Audio failed to process: ' + e)
                
                for s in range(num_segments):
                    # Process audio
                    start_sample = num_samples_per_segment * s
                    finish_sample = start_sample + num_samples_per_segment
                        
                    mfcc = librosa.feature.mfcc(signal[start_sample:finish_sample],
                                               sr=sr,
                                               n_fft=n_fft,
                                               n_mfcc=n_mfcc,
                                               hop_length=hop_length)

                    mfcc = mfcc.T
                    
                    if len(mfcc) == expected_num_mfcc_vectors_per_segment:
                        # store mfcc data
                        data["mfcc"].append(mfcc.tolist())
                        # store audio type
                        data["labels"].append(i-1)
                        print("{}, segment:{}".format(file_path, s+1))
    
    # walk through image directories
    for i, (dirpath, dirnames, filenames) in enumerate(os.walk(image_path)):
        if dirpath is not image_path:
            dirpath_components = dirpath.split("/")
            semantic_label = dirpath_components[-1]
            # store the directories opened
            data["mapping"].append(semantic_label)
            
            print("\nProcessing {}".format(semantic_label))
            
            for f in filenames:
                file_path = os.path.join(dirpath, f)
                try:
                    # process image
                    img_array = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) 
                    sized_array = cv2.resize(img_array, (IMG_SIZE, IMG_SIZE))
                    # store image data
                    data["image"].append(sized_array.tolist())
                    print("{}".format(file_path))
                except Exception as e:                                                    
                    print('Image failed to process: ' + e)
    
    # dump stored data into json file
    with open(json_path, "w") as fp:
        json.dump(data, fp, indent=4)
        

save_test_data(DATASET_AUDIO_TRAIN, DATASET_IMAGE_TRAIN, JSON_TRAIN, num_segments=1)


Processing Audio\Happy
EmotionDataset/Test/Audio\Happy\fate.wav, segment:1
EmotionDataset/Test/Audio\Happy\gap.wav, segment:1
EmotionDataset/Test/Audio\Happy\great.wav, segment:1
EmotionDataset/Test/Audio\Happy\half.wav, segment:1
EmotionDataset/Test/Audio\Happy\happy.wav, segment:1
EmotionDataset/Test/Audio\Happy\hug.wav, segment:1
EmotionDataset/Test/Audio\Happy\jail.wav, segment:1
EmotionDataset/Test/Audio\Happy\keep.wav, segment:1
EmotionDataset/Test/Audio\Happy\kick.wav, segment:1
EmotionDataset/Test/Audio\Happy\lamp.wav, segment:1
EmotionDataset/Test/Audio\Happy\lean.wav, segment:1
EmotionDataset/Test/Audio\Happy\make.wav, segment:1
EmotionDataset/Test/Audio\Happy\mend.wav, segment:1
EmotionDataset/Test/Audio\Happy\neutral.wav, segment:1
EmotionDataset/Test/Audio\Happy\pain.wav, segment:1
EmotionDataset/Test/Audio\Happy\pen.wav, segment:1
EmotionDataset/Test/Audio\Happy\rain.wav, segment:1
EmotionDataset/Test/Audio\Happy\sad.wav, segment:1
EmotionDataset/Test/Audio\Happy\sheep.w