In [6]:
import librosa
import numpy as np
import scipy.stats as stats
from scipy.stats import skew

In [7]:
# load audio file
filepath = 'Amazing Seagull.wav'
signal, sr = librosa.load(filepath)
features = {}

# compute features
frame_length = 1024


In [11]:
# short time energy
hop_length = 256
ste = np.array([sum(abs(signal[i:i+frame_length]**2)) for i in range(0, len(signal), hop_length)])
features['STE'] = np.mean(ste)
print('ste: \n',ste, '\n', 'Mean: ', features['STE'])

ste: 
 [2.29175866e-08 1.65013806e-03 4.40061677e-03 6.39157618e-03
 8.34321166e-03 8.85064940e-03 7.48714857e-03 7.92531067e-03
 8.61069831e-03 8.55502088e-03 9.17947925e-03 8.90005800e-03
 8.49292467e-03 8.63017936e-03 8.11717056e-03 7.21883693e-03
 6.23765195e-02 2.73507219e-01 7.68082719e-01 7.14575949e+00
 1.27830821e+01 2.06992830e+01 2.87443948e+01 2.75263903e+01
 2.77292587e+01 3.06238075e+01 3.35803040e+01 3.75286151e+01
 3.98995560e+01 4.06813595e+01 4.01302456e+01 4.30850770e+01
 4.19273686e+01 3.34010068e+01 2.57303230e+01 1.77033258e+01
 1.18481641e+01 9.08952709e+00 1.17166931e+01 1.58472232e+01
 2.68904092e+01 3.44806514e+01 3.23849128e+01 2.75930623e+01
 1.68540047e+01 9.49757068e+00 7.33448152e+00 4.49755133e+00
 3.09804003e+00 2.50320559e+00 9.37353297e-01 4.55755352e-01
 3.61131372e-01 2.54359955e-01 1.96096900e-01 1.68516682e-01
 1.17722049e-01 8.08967078e-02 8.39027889e-02 7.05579003e-02
 5.57098528e-02 3.94088599e-02 1.62954028e-02] 
 Mean:  11.533477478270234


In [28]:
# zero crossing rate
zcr = librosa.feature.zero_crossing_rate(signal, frame_length=frame_length, hop_length=hop_length)[0]
features['ZCR'] = np.mean(zcr)
print('zcr: \n', zcr, '\n zcr mean:', features['ZCR'])

zcr: 
 [0.35839844 0.54296875 0.7109375  0.5859375  0.43261719 0.28027344
 0.15625    0.12792969 0.13574219 0.14355469 0.13671875 0.1640625
 0.16699219 0.15917969 0.140625   0.11132812 0.10644531 0.10742188
 0.12597656 0.140625   0.13671875 0.13769531 0.14941406 0.19335938
 0.23535156 0.2890625  0.29589844 0.25488281 0.21484375 0.16015625
 0.13769531 0.13476562 0.1328125  0.13085938 0.13085938 0.14550781
 0.18261719 0.20996094 0.25585938 0.27929688 0.24023438 0.20507812
 0.15039062 0.10449219 0.09765625 0.09667969 0.09570312 0.10058594
 0.09960938 0.10058594 0.10351562 0.10058594 0.11035156 0.13867188
 0.15722656 0.16894531 0.17578125 0.16210938 0.16503906 0.16796875
 0.16796875 0.15625    0.10742188] 
 zcr mean: 0.18753100198412698


In [21]:
# spectral flatness
SFM = librosa.feature.spectral_flatness(y = signal)[0]
features['SFM'] = np.mean(SFM)
print('SFM: \n', SFM, '\n SFM mean: ', features['SFM'])

SFM: 
 [0.17688236 0.07615636 0.06202986 0.04838473 0.05555075 0.05738488
 0.05125243 0.0400781  0.03117413 0.02056828 0.01277736 0.00934713
 0.00553334 0.00409627 0.00319653 0.0016824  0.00124023 0.00096031
 0.00147089 0.00310748 0.00592654 0.00203235 0.0009542  0.00118973
 0.00203804 0.00250782 0.00492291 0.00875987 0.01073231 0.01570549
 0.02286479 0.0219596 ] 
 SFM mean:  0.023827108


In [39]:
# spectral skewness
S, phase = librosa.magphase(librosa.stft(signal)) #Create stft, then separate into magnitude and phase
spectral_skewness = stats.skew(S, axis=0)
features['Skew'] = np.mean(spectral_skewness)
print('Spectral skewness: \n', spectral_skewness, '\n Skew mean: ', features['Skew'])


Spectral skewness: 
 [ 1.6590369  3.295667   3.0508516  3.5065472  4.2160425  4.2815866
  4.3257575  3.8204896  3.5862615  4.33693    4.981189   4.8808827
  5.4005527  5.9082947  8.106551  13.044963  14.429175  14.963867
 11.465698   7.4563227  5.3897643 13.086238  13.424031  12.592856
 10.521028   9.661939   7.947133   6.3203897  5.700247   6.4532905
  4.9376307  5.2029624] 
 Skew mean:  7.1235676


In [40]:
# Spectral kurtosis
spectral_kurtosis = stats.kurtosis(S, axis=0)
features['Kurt'] = np.mean(spectral_kurtosis)
print('Spectral kurtosis: \n', spectral_kurtosis, '\n Kurt mean: ', features['Kurt'])

Spectral kurtosis: 
 [  2.7471614  12.777422    9.926008   13.649902   24.906662   26.353933
  26.652905   19.936502   15.592079   22.334318   32.148308   27.579779
  33.63271    44.317734   87.21113   196.93433   232.54408   254.5426
 157.41217    71.475815   34.68191   199.58258   194.41034   180.52675
 127.66412   114.43435    77.167076   50.912495   40.601818   57.886402
  33.030346   33.971878 ] 
 Kurt mean:  76.7983


In [42]:
# MFCC
sr = 44.1e3
MFCC = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
MFCC_var = np.var(MFCC, axis=1)
MFCC_mean1 = np.mean(MFCC, axis = 1)
print('MFCC_mean1: ', MFCC_mean1)
features['\n MFCC'] = np.mean(MFCC)
print('MFCC: \n', MFCC, '\n MFCC mean: ', features['MFCC'])

MFCC_mean1:  [-317.48657     29.277636   -55.900795    16.041819    -3.144356
   23.294086   -18.449902    -4.936064    12.50139      2.7269588
  -15.948442     2.5738902    2.6007955]
MFCC: 
 [[-6.38203552e+02 -5.72930786e+02 -4.46430115e+02 -4.08159180e+02
  -4.12419128e+02 -4.08396973e+02 -4.07244049e+02 -4.15061005e+02
  -4.26983246e+02 -3.52588959e+02 -2.42578751e+02 -1.76035248e+02
  -1.83384323e+02 -2.13408813e+02 -2.21326447e+02 -2.37638657e+02
  -2.50998611e+02 -2.55211838e+02 -2.50877853e+02 -2.41605179e+02
  -2.11319489e+02 -2.07810089e+02 -2.28673386e+02 -2.47250595e+02
  -2.48702194e+02 -2.55818344e+02 -2.88469879e+02 -3.20364075e+02
  -3.30601593e+02 -3.43179749e+02 -3.50428192e+02 -3.65469940e+02]
 [ 0.00000000e+00  6.07950897e+01  9.83576889e+01  9.93197174e+01
   9.11249390e+01  9.28611450e+01  9.58957367e+01  9.89309540e+01
   9.71020813e+01  6.26289062e+01  3.01755371e+01 -1.18700094e+01
  -3.68227539e+01 -4.76237335e+01 -4.97861099e+01 -4.12349434e+01
  -3.82505417e

In [None]:
#Copilot assistance:

# import librosa
# import numpy as np
# import scipy.stats

# # Load the audio file
# audio_path = 'path_to_your_audio_file.wav'
# y, sr = librosa.load(audio_path)

# # Spectral Flatness
# spectral_flatness = librosa.feature.spectral_flatness(y=y)

# # Short-Time Energy
# frame_length = 2048
# hop_length = 512
# short_time_energy = np.array([
#     np.sum(np.abs(y[i:i+frame_length]**2))
#     for i in range(0, len(y), hop_length)
# ])

# # Spectral Skewness
# S, phase = librosa.magphase(librosa.stft(y))
# spectral_skewness = scipy.stats.skew(S, axis=0)

# # Mel-Frequency Cepstral Coefficients (MFCC)
# mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
# mfcc_variability = np.var(mfccs, axis=1)

# print("Spectral Flatness:", spectral_flatness)
# print("Short-Time Energy:", short_time_energy)
# print("Spectral Skewness:", spectral_skewness)
# print("MFCC Variability:", mfcc_variability)
