In [1]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import os
import pandas as pd

In [2]:
data_path = '../data/'
files = []

#lists the total number of .wav files in /data/
for f in os.listdir(data_path):
    if f.endswith('.wav'):
        files.append(f)

print(f"Found {len(files)} .wav files. Preparing analysis.")

Found 7 .wav files. Preparing analysis.


In [None]:
#calculates the spectral centroid, spectral flatness, spectral rolloff, zero crossing rate, rms and mfcc for all files in /data/
results = []

for file_name in files:
    full_path = os.path.join(data_path, file_name)
    y, sr = librosa.load(full_path)
    
    cent = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
    cent_mean = np.mean(cent)
  
    flat = librosa.feature.spectral_flatness(y=y)[0]
    flat_mean = np.mean(flat)
    
    zero = librosa.feature.zero_crossing_rate(y=y)[0]
    zero_mean = np.mean(zero)
    
    rms = librosa.feature.rms(y=y)[0]
    rms_mean = np.mean(rms)
    rms_max = np.max(rms)

    roll_85 = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.85)[0]
    roll_85_mean = np.mean(roll_85)
    roll_99 = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.99)[0]
    roll_99_mean = np.mean(roll_99)


    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=5)
    mfcc_means = np.mean(mfcc, axis=1)
   
 

    #store in dictionary
    row = {
        'filename': file_name,
        'avg_brightness': cent_mean,
        'avg_flatness': flat_mean,
        'avg_smooth':  zero_mean,
        'avg_loudness' : rms_mean,
        'peak_loudness' : rms_max,
        '85_percent' : roll_85_mean,
        '99_percent' : roll_99_mean,


        'mfcc_1' : mfcc_means[0],
        'mfcc_2' : mfcc_means[1]
    }
    results.append(row)

df = pd.DataFrame(results)
print(df)





In [None]:
#Some prediction

def sound_prediction(row):



    #Identifying high energy sharp impact objects (glass ect)
    #High brightness + high percussive texture + high transient spike
    if row['avg_smooth'] > 0.1 and row['avg_brightness'] > 4000 and (row['peak_loudness'] > row['avg_loudness'] * 3):
        return "Glass/Sharp Impact"
    #Identifying noisy textures (hiss or wind ect)
    #High flatness + low dynamic range
    elif row['avg_flatness'] > 0.1 and (row['peak_loudness'] < row['avg_loudness'] * 1.5):
        return "Stationary Noise"
    #Identifying pure tonal sounds (Synth or bell ect)
    #Low flatness + mfcc shapes
    elif row['avg_flatness'] < 0.01 and row['mfcc_2'] > 0:
        return "Tonal Instrument"
    #Identifying low percussion (kick drum ect)
    #Low brightness + high energy spike
    elif row['avg_brightness'] < 1000 and row['peak_loudness'] > 0.4:
        return "Low percussion"
    else:
        return "Unknown/Complex"
    
df['prediction'] = df.apply(sound_prediction, axis=1)
print(df[['filename', 'prediction']])

            filename          prediction
0           Meow.wav    Tonal Instrument
1      Low Drone.wav    Tonal Instrument
2        Ringing.wav     Unknown/Complex
3  Glass Shatter.wav  Glass/Sharp Impact
4          Synth.wav     Unknown/Complex
5   Car Breaking.wav     Unknown/Complex
6    Racquet Hit.wav     Unknown/Complex
