# Sound Anomaly Detection

Python program to 
1. Load sounds of factory machines
2. Use a machine learning model 
   to predict whether the machine is defective (output: normal O or abnormal 1)
   from the sound (input)
3. If the machine is predicted to be abnormal: as a first check, the user can listen
   - the pre-recorded sound of the machine when it is normal
   - the current sound of the machine

See README file for more information.


## Import

In [1]:
# =====================================================================
# Import
# =====================================================================

# Import internal modules
import os.path
import joblib
import time
import random
from typing import List
from datetime import datetime


# Import 3rd party modules
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report
import IPython.display as ipd
from IPython.display import Markdown, clear_output
import ipywidgets as widgets

## Functions

In [2]:
def load_model(filename: str) -> GridSearchCV:
    """
    Function to load a machine learning model 
    from core/assets/data directory
    * param: filename of model
    """
    model_path = os.path.join("core", "assets", "data", filename)

    return joblib.load(model_path)


def listen_sounds(_):
    # linking function with output
    with listen_out:
        # what happens when we press the button
        clear_output()
        # Listen to current sound of machine
        print("Current sound")
        current_sound_path = os.path.join("core",
                     *df.loc[[abnormal_machine_dropdown.value]].sound_path.values[0].split("\\"))
        ipd.display(ipd.Audio(current_sound_path)) # ipd.display() to display multiple Audio objects at once

        # Listen to pre-recorded normal sound
        print("Pre-recorded normal sound")
        normal_sound = df.loc[
            (df.machine_type == df_pred_abnormal.loc[[abnormal_machine_dropdown.value]].machine_type.values[0]) & 
            (df.noise_db == 0) & 
            (df.model_id == 0) & 
            (df.target == 0) & 
            (df.sound == "00000000.wav")
        ].sound_path.values[0].split("\\")
        normal_sound_path = os.path.join("core",
                     *normal_sound)
        ipd.display(ipd.Audio(normal_sound_path))

def show_machine_info(_):
    with machine_info_out:
        clear_output()
        display_cols = ["machine_type", "model_id", "noise_db", "y_pred", "target"]
        display(df_pred_abnormal.loc[[abnormal_machine_dropdown.value]][display_cols])


In [3]:
# Get all sounds
df = pd.read_csv(os.path.join("core",
                 "assets", "data", "thread_csv_all.csv"))

# Feature engineering
# Replace abnormal by 1, normal by 0
df.target = df.target.apply(lambda x: 1 if x == "abnormal" else 0)

# Feature selection
# Select numeric columns
selected_cols = df.select_dtypes(include="number").columns.tolist()

# Drop noise_db and model_id columns
selected_cols.remove("noise_db")
selected_cols.remove("model_id")
selected_cols.remove("target")

# Select features X and target variable y
X = df[selected_cols]
y = df.target

# Split into the same training and test sets
# as we did when training our models
# to test the best machine learning model on unseen data (test set)
X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42,
                                                    stratify=y
                                                    )

# Load best model
loaded_model: GridSearchCV = load_model("best_model_all_features.joblib")

y_pred: np.ndarray = loaded_model.predict(X_test)

# Create a DataFrame with true and predicted labels, keep the original index (to retrieve the sound)
df_true_pred_test = pd.DataFrame(np.column_stack([y_test, y_pred]), index=y_test.index, columns=["y_test", "y_pred"])

# Get all sounds predicted as abnormal (class 1)
y_pred_abnormal: np.ndarray = df_true_pred_test[df_true_pred_test.y_pred == 1]

# Concatenate y_pred_abnormal with df to get all the sound information (including sound filepath)
df_pred_abnormal = pd.concat([y_pred_abnormal, df], axis=1, join="inner")

# Calendar: date of today by default, user can select another day
calendar = widgets.DatePicker(description='Today', value=datetime.today(), disabled=False)

# Info about factory health (number of predicted abnormal)
notification_out = widgets.Output()
with notification_out:
    display(Markdown(f"**Notifications**\n"))

# Layout of items
items_layout = widgets.Layout(width='auto')
notification_button = widgets.Button(description=f"{df_pred_abnormal.shape[0]} machine(s) predicted abnormal!", layout=items_layout, button_style='danger')

# Dropdown menu for selecting predicted abnormal machine
abnormal_machine_dropdown = widgets.Dropdown(
    options=list(set(df_pred_abnormal.index)),
    description='Sound ID',
    disabled=False)

# Info about the machine selected from dropdown menu
## Set buttons, outputs, function and linkage
machine_info_button = widgets.Button(description='Show machine info')
machine_info_out = widgets.Output()
listen_button = widgets.Button(description='Listen')
listen_out = widgets.Output()

## Link buttons and functions together with buttons'method
machine_info_button.on_click(show_machine_info)
listen_button.on_click(listen_sounds)
        
# Classification report
class_report_markdown_out = widgets.Output()

with class_report_markdown_out:
    display(Markdown("**Classification Report**\n"))
    print(classification_report(y_test, y_pred))

# VBox or HBox
home_box = widgets.VBox([calendar, notification_out, notification_button, abnormal_machine_dropdown, machine_info_button, machine_info_out, listen_button, listen_out,])
classification_report_box = widgets.VBox([class_report_markdown_out])

# Create list of boxes for the windows
children = [home_box, classification_report_box]

# Initiate tab
tab = widgets.Tab()
# setting the tab windows 
tab.children = children

# Set title of windows
tab.set_title(0, 'Home')
tab.set_title(1, 'Classification Report')
tab

Tab(children=(VBox(children=(DatePicker(value=datetime.datetime(2021, 4, 2, 12, 9, 9, 841410), description='To…