In [6]:
import numpy as np
import scipy.stats as stats
import pywt
import librosa
from scipy.stats import skew, kurtosis

# Load the audio file
audio_file = "F://Python all practice//310 Project//audio_sample.wav"
signal, sr = librosa.load(audio_file)

# Teager-Kaiser Energy Operator (TKEO) Features
tkeo = np.array([signal[i]**2 - signal[i-1] * signal[i+1] for i in range(1, len(signal)-1)])
print("TKEO Mean:", np.mean(tkeo))
print("TKEO Std Deviation:", np.std(tkeo))
print("TKEO Max:", np.max(tkeo))
print("TKEO Min:", np.min(tkeo))
print("TKEO Skewness:", skew(tkeo))
print("TKEO Kurtosis:", kurtosis(tkeo))

# Wavelet Features
wavelet = 'db1'
level = 9
coeffs = pywt.wavedec(signal, wavelet, level=level)
for index, coeff in enumerate(coeffs):
    energy = np.sum(np.square(coeff))
    print(f"Wavelet level {index+1} Energy:", energy)
    print(f"Wavelet level {index+1} Mean:", np.mean(coeff))
    print(f"Wavelet level {index+1} Std Deviation:", np.std(coeff))
    print(f"Wavelet level {index+1} Max:", np.max(coeff))
    print(f"Wavelet level {index+1} Min:", np.min(coeff))
    print(f"Wavelet level {index+1} Median:", np.median(coeff))
    print(f"Wavelet level {index+1} Skewness:", skew(coeff))
    print(f"Wavelet level {index+1} Kurtosis:", kurtosis(coeff))


# Calculate Fundamental Frequency (F0)
fmin=librosa.note_to_hz('C2')
fmax=librosa.note_to_hz('C7')
f0, _, _ = librosa.pyin(signal, fmin=fmin, fmax=fmax)
lowest_f0 = np.nanmin(f0) if np.any(~np.isnan(f0)) else 0  # Handling case where f0 might be all NaN
print("Fundamental Frequency:", lowest_f0)

# MFCC and Delta Features
n_mfcc = 20
frame_size_ms = 25
hop_length_ms = 10
frame_size_samples = librosa.time_to_samples(frame_size_ms/1000)
hop_length_samples = librosa.time_to_samples(hop_length_ms/1000)
mfcc_features = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=n_mfcc, n_fft=frame_size_samples, hop_length=hop_length_samples)
delta_mfcc = librosa.feature.delta(mfcc_features)
delta_delta_mfcc = librosa.feature.delta(mfcc_features, order=2)
print("MFCC Features Mean:", np.mean(mfcc_features, axis=1))
print("Delta MFCC Features Mean:", np.mean(delta_mfcc, axis=1))
print("Delta-Delta MFCC Features Mean:", np.mean(delta_delta_mfcc, axis=1))

# General Audio Signal Properties
print("ZCR:", np.mean(librosa.feature.zero_crossing_rate(signal)))
print("Energy:", np.sum(signal**2) / len(signal))
print("Amplitude:", np.max(np.abs(signal)))
print("Spectral Centroid:", np.mean(librosa.feature.spectral_centroid(y=signal, sr=sr)))
print("Spectral Rolloff:", np.mean(librosa.feature.spectral_rolloff(y=signal, sr=sr)))
print("Spectral Flatness:", np.mean(librosa.feature.spectral_flatness(y=signal)))
print("Spectral Bandwidth:", np.mean(librosa.feature.spectral_bandwidth(y=signal, sr=sr)))



# Initialize an empty dictionary to hold all the features
features_dict = {
    "TKEO Mean": np.mean(tkeo),
    "TKEO Std Deviation": np.std(tkeo),
    "TKEO Max": np.max(tkeo),
    "TKEO Min": np.min(tkeo),
    "TKEO Skewness": skew(tkeo),
    "TKEO Kurtosis": kurtosis(tkeo),
    "Fundamental Frequency": lowest_f0,
    "ZCR": np.mean(librosa.feature.zero_crossing_rate(signal)),
    "Energy": np.sum(signal**2) / len(signal),
    "Amplitude": np.max(np.abs(signal)),
    "Spectral Centroid": np.mean(librosa.feature.spectral_centroid(y=signal, sr=sr)),
    "Spectral Rolloff": np.mean(librosa.feature.spectral_rolloff(y=signal, sr=sr)),
    "Spectral Flatness": np.mean(librosa.feature.spectral_flatness(y=signal)),
    "Spectral Bandwidth": np.mean(librosa.feature.spectral_bandwidth(y=signal, sr=sr)),
}

# Add MFCC features and their deltas to the dictionary
for i in range(n_mfcc):
    features_dict[f"MFCC_{i+1}"] = mfcc_features[i].mean()
    features_dict[f"Delta MFCC_{i+1}"] = delta_mfcc[i].mean()
    features_dict[f"Delta-Delta MFCC_{i+1}"] = delta_delta_mfcc[i].mean()

# Convert the dictionary to a pandas DataFrame
features_df = pd.DataFrame([features_dict])

# Save the DataFrame to a CSV file
features_df.to_csv("audio_features.csv", index=False)

print("Features successfully saved to audio_features.csv")

TKEO Mean: 1.3757924418233692e-05
TKEO Std Deviation: 5.7687352354330566e-05
TKEO Max: 0.003208727305166692
TKEO Min: -0.0003525722799950781
TKEO Skewness: 12.636999825908953
TKEO Kurtosis: 319.1505811690701
Wavelet level 1 Energy: 0.14214504
Wavelet level 1 Mean: -0.00028778263
Wavelet level 1 Std Deviation: 0.018158194
Wavelet level 1 Max: 0.07648288
Wavelet level 1 Min: -0.17878775
Wavelet level 1 Median: -0.0005069226
Wavelet level 1 Skewness: -1.8480874878776228
Wavelet level 1 Kurtosis: 21.301242199056396
Wavelet level 2 Energy: 0.64187354
Wavelet level 2 Mean: 0.00013210413
Wavelet level 2 Std Deviation: 0.038590778
Wavelet level 2 Max: 0.16194202
Wavelet level 2 Min: -0.10101342
Wavelet level 2 Median: 0.0014106412
Wavelet level 2 Skewness: 0.12286741848680743
Wavelet level 2 Kurtosis: 0.09346090925693096
Wavelet level 3 Energy: 1.2060119
Wavelet level 3 Mean: -0.000103882216
Wavelet level 3 Std Deviation: 0.03740421
Wavelet level 3 Max: 0.15763931
Wavelet level 3 Min: -0.16401