In [1]:
import librosa
import librosa.display
import numpy as np
import matplotlib.pyplot as plt
import IPython.display as ipd
import os
import pandas as pd

In [2]:
data_path = '../data/'
files = []

#lists the total number of .wav files in /data/
for f in os.listdir(data_path):
    if f.endswith('.wav'):
        files.append(f)

print(f"Found {len(files)} .wav files. Preparing analysis.")

Found 7 .wav files. Preparing analysis.


In [None]:
#calculates the spectral centroid, spectral flatness, spectral rolloff, zero crossing rate, rms and mfcc for all files in /data/
results = []

for file_name in files:
    full_path = os.path.join(data_path, file_name)
    y, sr = librosa.load(full_path)
    
    cent = librosa.feature.spectral_centroid(y=y, sr=sr)[0]
    cent_mean = np.mean(cent)
  
    flat = librosa.feature.spectral_flatness(y=y)[0]
    flat_mean = np.mean(flat)
    
    zero = librosa.feature.zero_crossing_rate(y=y)[0]
    zero_mean = np.mean(zero)
    
    rms = librosa.feature.rms(y=y)
    rms_mean = np.mean(rms)
    rms_max = np.max(rms)

    roll_85 = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.85)[0]
    roll_85_mean = np.mean(roll_85)
    roll_99 = librosa.feature.spectral_rolloff(y=y, sr=sr, roll_percent=0.99)[0]
    roll_99_mean = np.mean(roll_99)


    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=5)
    mfcc_means = np.mean(mfcc, axis=1)
   
 

    #store in dictionary
    row = {
        'filename': file_name,
        'avg_brightness': cent_mean,
        'avg_flatness': flat_mean,
        'avg_smooth':  zero_mean,
        'avg_loudness' : rms_mean,
        'peak_loudness' : rms_max,
        '85_percent' : roll_85_mean,
        '99_percent' : roll_99_mean,


        'mfcc_1' : mfcc_means[0],
        'mfcc_2' : mfcc_means[1]
    }
    results.append(row)

df = pd.DataFrame(results)
print(df)





            filename  avg_brightness  avg_flatness  avg_smooth  avg_loudness  \
0           Meow.wav     1819.817127      0.003150    0.093896      0.138755   
1      Low Drone.wav      159.998375      0.000277    0.011732      0.077042   
2        Ringing.wav     5854.177057      0.131637    0.589513      0.191732   
3  Glass Shatter.wav     5605.301111      0.184680    0.481348      0.036265   
4          Synth.wav     2993.195814      0.029263    0.132919      0.094063   
5   Car Breaking.wav     2561.397718      0.024397    0.127654      0.066719   
6    Racquet Hit.wav     2239.549521      0.015058    0.116642      0.016182   

   peak_loudness   85_percent    99_percent      mfcc_1     mfcc_2  
0       0.414607  3489.182384   8436.676845 -230.522568  96.313850  
1       0.312767   217.988915    640.089467 -466.125336  79.706017  
2       0.355623  7192.979825   7899.820242 -443.279053  10.587992  
3       0.234137  8966.665039  10445.517578 -281.560822 -18.467615  
4       0.2654

In [None]:
#Some prediction

def sound_prediction(row):
    if row['avg_flatness'] > 0.1 and row['avg_brightness'] > 5000:
        return "Percussive/Noise"
    elif row['avg_flatness'] < 0.01:
        return "Tonal/Harmonic"
    else:
        return "Complex/Enviromental"
    
df['prediction'] = df.apply(sound_prediction, axis=1)
print(df[['filename', 'prediction']])

            filename            prediction
0           Meow.wav        Tonal/Harmonic
1      Low Drone.wav        Tonal/Harmonic
2        Ringing.wav      Percussive/Noise
3  Glass Shatter.wav      Percussive/Noise
4          Synth.wav  Complex Enviromental
5   Car Breaking.wav  Complex Enviromental
6    Racquet Hit.wav  Complex Enviromental
