## Industrial Machines Malfunction Detection using Deep Learning
### Valves, Slide-rails and Industrial Fans

#### Prepare Training Data: MFCCs from malfunction audio files-stored as JSON


Rajesh Siraskar | 22-May-2021
V.3.1: 100 files per category 

In [1]:
import json
import os
import math
import librosa

**Audio files:** 
- Valve - Normal operation E:\Projects\valve_diagnostics\audio_files\valve_normal
- Valve - Abnormal operation E:\Projects\valve_diagnostics\audio_files\valve_abnormal

In [2]:
DATA_PATH = "audio_files/"
JSON_FILE = "audio_files/machine_features.json" # Store MFCC features
SAMPLE_RATE = 22050
TRACK_DURATION = 10 # Seconds - this is known for the music files

NUM_CATEGORIES = 6  # Number of audio categoires = type_of_machines * 2
NUM_SEGMENTS = 2    # Augment data -- divide signal into additional 
MFCC_FEATURES = 13  # Extract these many features. Default = 13

SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION

In [3]:
def generate_and_save_mfcc(data_path, json_file, num_mfcc=13, n_fft=2048, hop_length=512, num_segments=10):
    """Extract MFC coefficients from music data-set and save them into a *SINGLE* json file along with labels (genre) 

        :param dataset_path (str): Path to dataset
        :param json_path (str): Path to json file used to save MFCCs
        :param num_mfcc (int): Number of coefficients to extract
        :param n_fft (int): Interval we consider to apply FFT. Measured in # of samples
        :param hop_length (int): Sliding window for FFT. Measured in # of samples
        :param: num_segments (int): Number of segments we want to divide sample tracks into
        :return:
        """

    # JSON dictionary to store mapping, labels, and MFCCs
    data = {
        "mapping": [],  # Semantic label i.e. "classical", "jazz", "blues" etc.
        "labels": [],   # Numeric labels i.e. 0, 1, 2 ... 
        "mfcc": []      # MFCC - these are floats. 13 by default for each segment
    }

    # Note: num_mfcc_vectors_per_segment increases if we reduce the num_segments
    samples_per_segment = int(SAMPLES_PER_TRACK/num_segments)
    num_mfcc_vectors_per_segment = math.ceil(samples_per_segment/hop_length)
    print("\n - SAMPLES_PER_TRACK: {}, num_mfcc_vectors_per_segment: {}".format(SAMPLES_PER_TRACK, num_mfcc_vectors_per_segment))

    # Loop through all genre sub-folder
    #  Use os.walk that is like a crawler on nested sub-folders 
    #  TRICK: 'enumerate' on os.walk allows converting the "GENRE sub-folder" into a NUMBER
    #         and use that as the NUMERIC label!
    for n_label, (dirpath, dirnames, filenames) in enumerate(os.walk(data_path)):

        # Ensure we're processing a genre sub-folder level and not the highest level folder
        if dirpath is not data_path:

            # Save genre label (i.e., sub-folder name) in the mapping
            # Get the semantic genre name from the full file path name's last component
            semantic_label = dirpath.split("/")[-1]
            data["mapping"].append(semantic_label)
            print("\n * Processing genre: \'{}\'".format(semantic_label))

            # Process all audio files in genre sub-dir
            for fname in filenames:
                # Load audio file
                file_path = os.path.join(dirpath, fname)
                signal, sample_rate = librosa.load(file_path, sr=SAMPLE_RATE)

                # Process all segments of audio file
                for n_segment in range(num_segments):

                    # Calculate start and finish sample for current segment
                    start = samples_per_segment*n_segment
                    finish = start+samples_per_segment

                    # Extract mfcc
                    mfcc = librosa.feature.mfcc(signal[start:finish], sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
                    
                    # Transpose
                    mfcc = mfcc.T

                    # Store only mfcc feature with expected number of vectors
                    if len(mfcc) == num_mfcc_vectors_per_segment:
                        # Note: mfcc is a numpy array and needs to be converted to list 
                        #  before storing in JSON
                        data["mfcc"].append(mfcc.tolist())
                        data["labels"].append(n_label-1)
                        wav_file = file_path.split("/")[-1] 
                        print("   --- {}, segment:{}".format(wav_file, n_segment+1))

    print("\n\n - MFCC's extracted. Write data to JSON")                        
    # Save MFCCs to json file
    with open(json_file, "w") as fp:
        json.dump(data, fp, indent=4)
        
    print(" - JSON file ready")

In [4]:
 generate_and_save_mfcc(DATA_PATH, JSON_FILE, num_mfcc=MFCC_FEATURES, num_segments=NUM_SEGMENTS)


 - SAMPLES_PER_TRACK: 220500, num_mfcc_vectors_per_segment: 216

 * Processing genre: 'industrial_fan_abnormal'
   --- industrial_fan_abnormal\00000000.wav, segment:1
   --- industrial_fan_abnormal\00000000.wav, segment:2
   --- industrial_fan_abnormal\00000001.wav, segment:1
   --- industrial_fan_abnormal\00000001.wav, segment:2
   --- industrial_fan_abnormal\00000002.wav, segment:1
   --- industrial_fan_abnormal\00000002.wav, segment:2
   --- industrial_fan_abnormal\00000003.wav, segment:1
   --- industrial_fan_abnormal\00000003.wav, segment:2
   --- industrial_fan_abnormal\00000004.wav, segment:1
   --- industrial_fan_abnormal\00000004.wav, segment:2
   --- industrial_fan_abnormal\00000005.wav, segment:1
   --- industrial_fan_abnormal\00000005.wav, segment:2
   --- industrial_fan_abnormal\00000006.wav, segment:1
   --- industrial_fan_abnormal\00000006.wav, segment:2
   --- industrial_fan_abnormal\00000007.wav, segment:1
   --- industrial_fan_abnormal\00000007.wav, segment:2
   --- 

   --- industrial_fan_abnormal\00000074.wav, segment:1
   --- industrial_fan_abnormal\00000074.wav, segment:2
   --- industrial_fan_abnormal\00000075.wav, segment:1
   --- industrial_fan_abnormal\00000075.wav, segment:2
   --- industrial_fan_abnormal\00000076.wav, segment:1
   --- industrial_fan_abnormal\00000076.wav, segment:2
   --- industrial_fan_abnormal\00000077.wav, segment:1
   --- industrial_fan_abnormal\00000077.wav, segment:2
   --- industrial_fan_abnormal\00000078.wav, segment:1
   --- industrial_fan_abnormal\00000078.wav, segment:2
   --- industrial_fan_abnormal\00000079.wav, segment:1
   --- industrial_fan_abnormal\00000079.wav, segment:2
   --- industrial_fan_abnormal\00000080.wav, segment:1
   --- industrial_fan_abnormal\00000080.wav, segment:2
   --- industrial_fan_abnormal\00000081.wav, segment:1
   --- industrial_fan_abnormal\00000081.wav, segment:2
   --- industrial_fan_abnormal\00000082.wav, segment:1
   --- industrial_fan_abnormal\00000082.wav, segment:2
   --- ind

   --- industrial_fan_normal\00000050.wav, segment:1
   --- industrial_fan_normal\00000050.wav, segment:2
   --- industrial_fan_normal\00000051.wav, segment:1
   --- industrial_fan_normal\00000051.wav, segment:2
   --- industrial_fan_normal\00000052.wav, segment:1
   --- industrial_fan_normal\00000052.wav, segment:2
   --- industrial_fan_normal\00000053.wav, segment:1
   --- industrial_fan_normal\00000053.wav, segment:2
   --- industrial_fan_normal\00000054.wav, segment:1
   --- industrial_fan_normal\00000054.wav, segment:2
   --- industrial_fan_normal\00000055.wav, segment:1
   --- industrial_fan_normal\00000055.wav, segment:2
   --- industrial_fan_normal\00000056.wav, segment:1
   --- industrial_fan_normal\00000056.wav, segment:2
   --- industrial_fan_normal\00000057.wav, segment:1
   --- industrial_fan_normal\00000057.wav, segment:2
   --- industrial_fan_normal\00000058.wav, segment:1
   --- industrial_fan_normal\00000058.wav, segment:2
   --- industrial_fan_normal\00000059.wav, seg

   --- slide_rail_abnormal\00000028.wav, segment:1
   --- slide_rail_abnormal\00000028.wav, segment:2
   --- slide_rail_abnormal\00000029.wav, segment:1
   --- slide_rail_abnormal\00000029.wav, segment:2
   --- slide_rail_abnormal\00000030.wav, segment:1
   --- slide_rail_abnormal\00000030.wav, segment:2
   --- slide_rail_abnormal\00000031.wav, segment:1
   --- slide_rail_abnormal\00000031.wav, segment:2
   --- slide_rail_abnormal\00000032.wav, segment:1
   --- slide_rail_abnormal\00000032.wav, segment:2
   --- slide_rail_abnormal\00000033.wav, segment:1
   --- slide_rail_abnormal\00000033.wav, segment:2
   --- slide_rail_abnormal\00000034.wav, segment:1
   --- slide_rail_abnormal\00000034.wav, segment:2
   --- slide_rail_abnormal\00000035.wav, segment:1
   --- slide_rail_abnormal\00000035.wav, segment:2
   --- slide_rail_abnormal\00000036.wav, segment:1
   --- slide_rail_abnormal\00000036.wav, segment:2
   --- slide_rail_abnormal\00000037.wav, segment:1
   --- slide_rail_abnormal\0000

   --- slide_rail_normal\00000009.wav, segment:1
   --- slide_rail_normal\00000009.wav, segment:2
   --- slide_rail_normal\00000010.wav, segment:1
   --- slide_rail_normal\00000010.wav, segment:2
   --- slide_rail_normal\00000011.wav, segment:1
   --- slide_rail_normal\00000011.wav, segment:2
   --- slide_rail_normal\00000012.wav, segment:1
   --- slide_rail_normal\00000012.wav, segment:2
   --- slide_rail_normal\00000013.wav, segment:1
   --- slide_rail_normal\00000013.wav, segment:2
   --- slide_rail_normal\00000014.wav, segment:1
   --- slide_rail_normal\00000014.wav, segment:2
   --- slide_rail_normal\00000015.wav, segment:1
   --- slide_rail_normal\00000015.wav, segment:2
   --- slide_rail_normal\00000016.wav, segment:1
   --- slide_rail_normal\00000016.wav, segment:2
   --- slide_rail_normal\00000017.wav, segment:1
   --- slide_rail_normal\00000017.wav, segment:2
   --- slide_rail_normal\00000018.wav, segment:1
   --- slide_rail_normal\00000018.wav, segment:2
   --- slide_rail_no

   --- slide_rail_normal\00000093.wav, segment:1
   --- slide_rail_normal\00000093.wav, segment:2
   --- slide_rail_normal\00000094.wav, segment:1
   --- slide_rail_normal\00000094.wav, segment:2
   --- slide_rail_normal\00000095.wav, segment:1
   --- slide_rail_normal\00000095.wav, segment:2
   --- slide_rail_normal\00000096.wav, segment:1
   --- slide_rail_normal\00000096.wav, segment:2
   --- slide_rail_normal\00000097.wav, segment:1
   --- slide_rail_normal\00000097.wav, segment:2
   --- slide_rail_normal\00000098.wav, segment:1
   --- slide_rail_normal\00000098.wav, segment:2
   --- slide_rail_normal\00000099.wav, segment:1
   --- slide_rail_normal\00000099.wav, segment:2

 * Processing genre: 'valve_abnormal'
   --- valve_abnormal\00000000.wav, segment:1
   --- valve_abnormal\00000000.wav, segment:2
   --- valve_abnormal\00000001.wav, segment:1
   --- valve_abnormal\00000001.wav, segment:2
   --- valve_abnormal\00000002.wav, segment:1
   --- valve_abnormal\00000002.wav, segment:2

   --- valve_abnormal\00000082.wav, segment:1
   --- valve_abnormal\00000082.wav, segment:2
   --- valve_abnormal\00000083.wav, segment:1
   --- valve_abnormal\00000083.wav, segment:2
   --- valve_abnormal\00000084.wav, segment:1
   --- valve_abnormal\00000084.wav, segment:2
   --- valve_abnormal\00000085.wav, segment:1
   --- valve_abnormal\00000085.wav, segment:2
   --- valve_abnormal\00000086.wav, segment:1
   --- valve_abnormal\00000086.wav, segment:2
   --- valve_abnormal\00000087.wav, segment:1
   --- valve_abnormal\00000087.wav, segment:2
   --- valve_abnormal\00000088.wav, segment:1
   --- valve_abnormal\00000088.wav, segment:2
   --- valve_abnormal\00000089.wav, segment:1
   --- valve_abnormal\00000089.wav, segment:2
   --- valve_abnormal\00000090.wav, segment:1
   --- valve_abnormal\00000090.wav, segment:2
   --- valve_abnormal\00000091.wav, segment:1
   --- valve_abnormal\00000091.wav, segment:2
   --- valve_abnormal\00000092.wav, segment:1
   --- valve_abnormal\00000092.wav

   --- valve_normal\00000074.wav, segment:1
   --- valve_normal\00000074.wav, segment:2
   --- valve_normal\00000075.wav, segment:1
   --- valve_normal\00000075.wav, segment:2
   --- valve_normal\00000076.wav, segment:1
   --- valve_normal\00000076.wav, segment:2
   --- valve_normal\00000077.wav, segment:1
   --- valve_normal\00000077.wav, segment:2
   --- valve_normal\00000078.wav, segment:1
   --- valve_normal\00000078.wav, segment:2
   --- valve_normal\00000079.wav, segment:1
   --- valve_normal\00000079.wav, segment:2
   --- valve_normal\00000080.wav, segment:1
   --- valve_normal\00000080.wav, segment:2
   --- valve_normal\00000081.wav, segment:1
   --- valve_normal\00000081.wav, segment:2
   --- valve_normal\00000082.wav, segment:1
   --- valve_normal\00000082.wav, segment:2
   --- valve_normal\00000083.wav, segment:1
   --- valve_normal\00000083.wav, segment:2
   --- valve_normal\00000084.wav, segment:1
   --- valve_normal\00000084.wav, segment:2
   --- valve_normal\00000085.wav

Understand Saved Data
====================

- Elements saved
- Dimensions of data
- Later in training this load_data function is used and reshape data for training


**3-D array dimensions dim.**: 
- Given: hop_length=512; n_classes = 4 genres; training data is (only) 2 wav.files per genre 
- dimension 1: num_segments x n_classes x .wav-files-per-class 
- dimension 2: num_mfcc_vectors_per_segment = math.ceil(samples_per_segment/hop_length)
- dimension 3: num_mfcc

```
Example: 

- hop_length=512; n_classes = 4 genres; .wav files per genre = 2
- num_segments = 10; num_mfcc=13; SAMPLE_RATE=22050; TRACK_DURATION=30 s
- SAMPLES_PER_TRACK = SAMPLE_RATE * TRACK_DURATION = 22050 x 30
- samples_per_segment = int(SAMPLES_PER_TRACK/num_segments) = int(22050 x 30/10) = 66150.0

1. dim-1: 10 x 4 x 2 = 80 
2. dim-2: math.ceil(samples_per_segment/hop_length) = 66150/512 = 129.12 -> ceil -> 130
3. dim-3: 13

Therefore X.data.shape = (80, 130, 13) and y.data.shape: (80,)
```

**Input layer**: 
```
    # input = 2D: MFCC for each segment, and each mfcc is a vector 
    #   over an interval = hop-length
    #   first-dim: interval = inputs.shape[1]
    #   second-dim: mfcc = inputs.shape[2]
    #   NOTE: inputs.shape[0] is the segment number and we are not passing that
    keras.layers.Flatten(input_shape=(X.shape[1], X.shape[2])),
```

In [5]:
import json
import numpy as np

def load_data (json_file):
    with open(json_file, "r") as fp:
        data = json.load(fp)
        
        # Note: mfcc was converted from a numpy array to list before storing in JSON
        #  Need convert back to numpy array
        X = np.array(data["mfcc"])
        y = np.array(data["labels"])
        
    return X, y

In [6]:
### 1. Load data
X, y = load_data (json_file = JSON_FILE)

In [7]:
samples_per_segment = int(SAMPLES_PER_TRACK/NUM_SEGMENTS)
num_mfcc_vectors_per_segment = math.ceil(samples_per_segment/512)
num_of_audio_files_per_category=100
print("1. num_mfcc: {}\n2. num_segments: {}\n3. SAMPLES_PER_TRACK: {}\n4. num_mfcc_vectors_per_segment {}\n".format(MFCC_FEATURES, NUM_SEGMENTS, SAMPLES_PER_TRACK, num_mfcc_vectors_per_segment))
print("5. NUM_CATEGORIES: {}\n6. num_of_audio_files_per_category = {}".format(NUM_CATEGORIES, num_of_audio_files_per_category))
no_records = NUM_CATEGORIES*num_of_audio_files_per_category*NUM_SEGMENTS
print("7. num_of_records = num_of_categories*num_of_audio_files_per_category*NUM_SEGMENTS = {}".format(no_records))

print("\n\nX.data.shape: {} (no_records, num_mfcc_vectors_per_segment, num_mfcc)".format(X.data.shape))
print("y.data.shape: {} (no_records, )".format(y.data.shape))

1. num_mfcc: 13
2. num_segments: 2
3. SAMPLES_PER_TRACK: 220500
4. num_mfcc_vectors_per_segment 216

5. NUM_CATEGORIES: 6
6. num_of_audio_files_per_category = 100
7. num_of_records = num_of_categories*num_of_audio_files_per_category*NUM_SEGMENTS = 1200


X.data.shape: (1200, 216, 13) (no_records, num_mfcc_vectors_per_segment, num_mfcc)
y.data.shape: (1200,) (no_records, )
