# Audio MNIST

In [1]:
# Import python packages
from setup import Setup
from utilities import Utilities
import glob
import os
import pandas as pd
import warnings
from data_processing import DataProcessing
from data_visualization import DataVisualization
from feature_engineering import FeatureEngineering
from data_split import DataSplit
from xgboost_model import XGBoostModel

# Ignore warnings
warnings.filterwarnings("ignore")

# Initialize class
SU = Setup(cfg_filepath = 'config.yaml')

# Get the paths
source_path = SU.source_path
meta_path = SU.meta_path
destination_path = SU.destination_path
plot_path = SU.plot_path
result_path = SU.result_path
model_folder_path = SU.model_folder_path
model_param_path = SU.param_path
model_hyperparam_path = SU.hyperparam_path

# Initialize class
UT = Utilities(destination_path)

# Read files
meta_data = UT.read_file(meta_path)
model_param = UT.read_file(model_param_path)
model_hyperparam = UT.read_file(model_hyperparam_path)

# Initialize classes
DP = DataProcessing(target_sr=8000)
DV = DataVisualization(plot_path)
FE = FeatureEngineering()
DS = DataSplit()
DS = DataSplit(test_size=0.1, val_size=0.1)

# Create empty dataframe
df = UT.create_dataframe(None, column_names=["gender", "digit"])

# Specify total number of folders in source path
all_folders = len(next(os.walk(source_path))[1])+1

# Loop over audio recordings in the source path
for i in range(1, all_folders):
    # Show progress
    UT.loop_progress(i, all_folders-1)

    # Assign source temp
    src_temp = os.path.join(source_path, f"{i:02d}")
    filepath_filename = sorted(glob.glob(os.path.join(src_temp, "*.wav")))

    # Loop over files in directory
    for file in filepath_filename:
        # Split file string
        dig, vp, rep = file.rstrip(".wav").split("/")[-1].split("_")

        # Read audio data
        fs, audio_data = UT.read_audio(file)

        # Plot audio signal
        audio_name = f"audio_{dig[-1]}_{vp}_{rep}.png"
        #DV.plot_audio(fs, audio_data, audio_name)

        # Plot STFT of audio signal
        stft_name = f"stft_{dig[-1]}_{vp}_{rep}.png"
        #DV.plot_stft(fs, audio_data, stft_name)

        # Play audio signal
        #DV.play_audio(file)

        # Resample audio data
        audio_data = DP.resample_data(fs, audio_data)

        # Zero padding audio data
        audio_data = DP.zero_pad(audio_data)

        # FFT audio data
        fft_data = DP.fft_data(audio_data)

        # Apply bandpass filter
        bp_data = DP.bandpass_filter(fft_data, low_threshold=100, high_threshold=250)

        # Feature creation
        features = DP.feature_creation(fft_data)

        # Normalize features
        n_features = DP.normalize_features(features)

        # Add gender and digit label
        features = DP.add_gender(n_features, meta_data[vp]["gender"])
        features = DP.add_digit(n_features, dig[-1])

        # Append new dict values to the DataFrame
        df = df.append(features, ignore_index=True)
        #break

# Save data to CSV
csv_name = "features_data.csv"
UT.save_df_to_csv(df, csv_name)

Progress: 1.67%
Progress: 3.33%
Progress: 5.00%
Progress: 6.67%
Progress: 8.33%
Progress: 10.00%
Progress: 11.67%
Progress: 13.33%
Progress: 15.00%
Progress: 16.67%
Progress: 18.33%
Progress: 20.00%
Progress: 21.67%
Progress: 23.33%
Progress: 25.00%
Progress: 26.67%
Progress: 28.33%
Progress: 30.00%
Progress: 31.67%
Progress: 33.33%
Progress: 35.00%
Progress: 36.67%
Progress: 38.33%
Progress: 40.00%
Progress: 41.67%
Progress: 43.33%
Progress: 45.00%
Progress: 46.67%
Progress: 48.33%
Progress: 50.00%
Progress: 51.67%
Progress: 53.33%
Progress: 55.00%
Progress: 56.67%
Progress: 58.33%
Progress: 60.00%
Progress: 61.67%
Progress: 63.33%
Progress: 65.00%
Progress: 66.67%
Progress: 68.33%
Progress: 70.00%
Progress: 71.67%
Progress: 73.33%
Progress: 75.00%
Progress: 76.67%
Progress: 78.33%
Progress: 80.00%
Progress: 81.67%
Progress: 83.33%
Progress: 85.00%
Progress: 86.67%
Progress: 88.33%
Progress: 90.00%
Progress: 91.67%
Progress: 93.33%
Progress: 95.00%
Progress: 96.67%
Progress: 98.33%
Pr

In [2]:
# Load CSV file into dataframe
df = UT.csv_to_df(csv_name)

# Show size of dataset
df_size = UT.df_shape(df)
print(f"Size of data set, columns: {df_size[1]} and rows: {df_size[0]}")

# Remove digit column
df = UT.remove_column(df, "digit")

# Create label column where 'female' is 0 and 'male' is 1
df = FE.create_label_column(df)

# Plot column distribution
plot_name = "column_distribution.png"
DV.column_distribution(df, plot_name)

# Leave target columns out
columns_to_leave_out = ["label"]

# Remove constant columns
df = FE.remove_constant_columns(df, columns_to_leave_out)

# Calculate correlation matrix
corr_matrix = FE.pearson_correlation(df, columns_to_leave_out)

# Plot correlation matrix
corr_name = "correlation_matrix.png"
DV.plot_corr_matrix(corr_matrix, corr_name)

# Assign correlation threshold
threshold = 0.95

# Remove correlated columns
df = FE.remove_correlated_columns(df, threshold, columns_to_leave_out)

# Save data to CSV
csv_name = "final_data.csv"
UT.save_df_to_csv(df, csv_name)

# Load CSV file into dataframe
df = UT.csv_to_df(csv_name)

# Split data into training (80%), validation (10%), and test set (10%)
train_df, val_df, test_df = DS.split(df, "label")

# Show size of datasets
train_size = UT.df_shape(train_df)
val_size = UT.df_shape(val_df)
test_size = UT.df_shape(test_df)
print(f"Size of training set, columns: {train_size[1]} and rows: {train_size[0]}")
print(f"Size of validation set, columns: {val_size[1]} and rows: {val_size[0]}")
print(f"Size of validation set, columns: {test_size[1]} and rows: {test_size[0]}")

# Show gender balance
gender_count = df["label"].value_counts()
print(f"Number of female audio recordings: {gender_count[0]}")
print(f"Number of male audio recordings: {gender_count[1]}")

# Initialize classes
XM = XGBoostModel(train_df, val_df, test_df)

# Prepare datasets
X_train, y_train, X_val, y_val, X_test, y_test = XM.prepare_data()

# Hyperparameters tuning
log_name = "best_modeL_param.yaml"
#XM.grid_search(X_train, y_train, X_val, y_val, result_path, log_name, model_hyperparam)

# Set model parameters
XM.set_params(model_param)

# Train model
log_name = "model_results.yaml"
XM.fit(X_train, y_train, X_val, y_val, result_path, log_name)

# Feature importance
feature_importance = XM.feature_importance()

# Plot feature importance
feat_imp_name = "feature_importance.png"
DV.plot_feature_importance(feature_importance, test_df.iloc[:,:-1].columns, feat_imp_name)

# Make predictions
y_pred = XM.predict(X_test)

# Evaluate model
accuracy = XM.evaluate_predictions(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy*100))

# Read log file
log_data = UT.read_file(os.path.join(result_path, log_name))

# Load log data into pandas dataframe
df = XM.create_log_df(log_data)

# Plot training and validation loss
loss_name = "model_loss.png"
DV.plot_loss(df["iteration"], df["train_loss"], df["val_loss"], loss_name)

# Plot training and validation accuracy
acc_name = "model_accuracy.png"
DV.plot_accuracy(df["iteration"], df["train_acc"], df["val_acc"], acc_name)

Size of training set, columns: 9 and rows: 24000
Size of validation set, columns: 9 and rows: 3000
Size of validation set, columns: 9 and rows: 3000
Number of female audio recordings: 6000
Number of male audio recordings: 24000
Accuracy: 82.00%


<Figure size 720x720 with 0 Axes>

<Figure size 720x720 with 0 Axes>