In [2]:
import os
import librosa
import pandas as pd
import numpy as np

# This commands help you transfer mp4 files to mp3 files if you want to try more sound files
# Please fill in your video data location
! ffmpeg -i 'your video.mp4' 'your video.mp3'

# Place your mp3 converted file into a directory folder, if the folder name is different change it below.
audio_folder = './sound'

# List all .mp3 files in the folder
audio_files = [os.path.join(audio_folder, file) for file in os.listdir(audio_folder) if file.endswith('.mp3')]

# Initialize an empty list to store features
features_list = []

# Iterate over audio files to extract features
for file in audio_files:

    y_sound, sr_sound = librosa.load(file, sr=None)

    # Calculate the mean and standard deviation of amplitude
    amplitude_mean = np.mean(np.abs(y))
    amplitude_std = np.std(np.abs(y))

    # Calculate FFT to extract frequency information
    fft = np.fft.fft(y)
    magnitude = np.abs(fft)
    frequency = np.fft.fftfreq(len(magnitude), 1/sr)
    dominant_frequency = frequency[np.argmax(magnitude[:len(magnitude)//2])]

    # Extract pitch
    pitches, magnitudes = librosa.piptrack(y_sound=y_sound, sr_sound=sr_sound)
    # Get average pitch
    if np.any(magnitudes):
        pitch_mean = np.mean(pitches[magnitudes > np.median(magnitudes)])
    else:
        pitch_mean = 0

    # Calculate Mel spectrogram and convert to decibel units
    S = librosa.feature.melspectrogram(y_sound=y_sound, sr_sound=sr_sound)
    S_DB = librosa.power_to_db(S, ref=np.max)
    mel_spectrogram_mean = np.mean(S_DB)
    mel_spectrogram_std = np.std(S_DB)

    # Create features and add to the list
    features = {
        'Filename': os.path.basename(file),
        'Amplitude Mean': amplitude_mean,
        'Amplitude Std': amplitude_std,
        'Dominant Frequency': dominant_frequency,
        'Pitch Mean': pitch_mean,
        'Mel Spectrogram Mean': mel_spectrogram_mean,
        'Mel Spectrogram Std': mel_spectrogram_std,
    }

    features_list.append(features)

features_df = pd.DataFrame(features_list)

print(features_df.head())

# Save the DataFrame to a CSV file
features_df.to_csv('audio_features.csv', index=False)


SyntaxError: invalid syntax (<ipython-input-2-bd98be9156fc>, line 8)