# calculate the average frame for audio

In [11]:
import os
import wave
import numpy as np

# Specify the directory containing the WAV files
directory = r'C:\Users\...'
# user define Frame length & Hop length for each frame.
N_F = 1024
N_H = 512

total_frames = 0
file_count = 0
sample_rates = []

for filename in os.listdir(directory):
    if filename.endswith(".wav"):
        with wave.open(os.path.join(directory, filename), 'rb') as wav_file:
            # Read all frame
            frames = wav_file.readframes(wav_file.getnframes())
            # Convert audio bytes to a numpy array of integers
            # Each audio file has 16 bits per sample
            x = np.frombuffer(frames, dtype=np.int16)
            
            # Calculate the number of frames
            # Formula based on Digital Signals Theory book
            N_frames = 1 + (len(x) - N_F) // N_H
            total_frames += N_frames
            file_count += 1

            # Get and store the sample rate
            sample_rates.append(wav_file.getframerate())

# Calculate the average number of frames
average_frames = total_frames / file_count
print(f"Average number of frames per WAV file: {average_frames}")

# Get the average sample rate
average_sample_rate = sum(sample_rates) / len(sample_rates)

# Get the average frame duration
frame_duration_ms = (N_F / average_sample_rate) * 1000
print(f"Average duration of one frame: {frame_duration_ms:.2f} ms")

Average number of frames per WAV file: 346.65486111111113
Average duration of one frame: 21.33 ms


In [12]:
# Specify the directory containing the WAV files
directory = r'C:\Users\...'
# user define Frame length & Hop length for each frame.
N_F = 2048
N_H = 1024

total_frames = 0
file_count = 0
sample_rates = []

for filename in os.listdir(directory):
    if filename.endswith(".wav"):
        with wave.open(os.path.join(directory, filename), 'rb') as wav_file:
            # Read all frame
            frames = wav_file.readframes(wav_file.getnframes())
            # Convert audio bytes to a numpy array of integers
            # Each audio file has 16 bits per sample
            x = np.frombuffer(frames, dtype=np.int16)
            
            # Calculate the number of frames
            # Formula based on Digital Signals Theory book
            N_frames = 1 + (len(x) - N_F) // N_H
            total_frames += N_frames
            file_count += 1

            # Get and store the sample rate
            sample_rates.append(wav_file.getframerate())

# Calculate the average number of frames
average_frames = total_frames / file_count
print(f"Average number of frames per WAV file: {average_frames}")

# Get the average sample rate
average_sample_rate = sum(sample_rates) / len(sample_rates)

# Get the average frame duration
frame_duration_ms = (N_F / average_sample_rate) * 1000
print(f"Average duration of one frame: {frame_duration_ms:.2f} ms")

Average number of frames per WAV file: 172.58263888888888
Average duration of one frame: 42.67 ms


In [13]:
# Specify the directory containing the WAV files
directory = r'C:\Users\...'
# user define Frame length & Hop length for each frame.
N_F = 4096
N_H = 2048

total_frames = 0
file_count = 0
sample_rates = []

for filename in os.listdir(directory):
    if filename.endswith(".wav"):
        with wave.open(os.path.join(directory, filename), 'rb') as wav_file:
            # Read all frame
            frames = wav_file.readframes(wav_file.getnframes())
            # Convert audio bytes to a numpy array of integers
            # Each audio file has 16 bits per sample
            x = np.frombuffer(frames, dtype=np.int16)
            
            # Calculate the number of frames
            # Formula based on Digital Signals Theory book
            N_frames = 1 + (len(x) - N_F) // N_H
            total_frames += N_frames
            file_count += 1

            # Get and store the sample rate
            sample_rates.append(wav_file.getframerate())

# Calculate the average number of frames
average_frames = total_frames / file_count
print(f"Average number of frames per WAV file: {average_frames}")

# Get the average sample rate
average_sample_rate = sum(sample_rates) / len(sample_rates)

# Get the average frame duration
frame_duration_ms = (N_F / average_sample_rate) * 1000
print(f"Average duration of one frame: {frame_duration_ms:.2f} ms")

Average number of frames per WAV file: 85.53888888888889
Average duration of one frame: 85.33 ms


In [14]:
# Specify the directory containing the WAV files
directory = r'C:\Users\...'
# user define Frame length & Hop length for each frame.
N_F = 4096
N_H = 4096

total_frames = 0
file_count = 0
sample_rates = []

for filename in os.listdir(directory):
    if filename.endswith(".wav"):
        with wave.open(os.path.join(directory, filename), 'rb') as wav_file:
            # Read all frame
            frames = wav_file.readframes(wav_file.getnframes())
            # Convert audio bytes to a numpy array of integers
            # Each audio file has 16 bits per sample
            x = np.frombuffer(frames, dtype=np.int16)
            
            # Calculate the number of frames
            # Formula based on Digital Signals Theory book
            N_frames = 1 + (len(x) - N_F) // N_H
            total_frames += N_frames
            file_count += 1

            # Get and store the sample rate
            sample_rates.append(wav_file.getframerate())

# Calculate the average number of frames
average_frames = total_frames / file_count
print(f"Average number of frames per WAV file: {average_frames}")

# Get the average sample rate
average_sample_rate = sum(sample_rates) / len(sample_rates)

# Get the average frame duration
frame_duration_ms = (N_F / average_sample_rate) * 1000
print(f"Average duration of one frame: {frame_duration_ms:.2f} ms")

Average number of frames per WAV file: 43.02569444444445
Average duration of one frame: 85.33 ms


# Get the MFCCs for all audio (for target_frames = 43 & 347)

In [10]:
import librosa

def process_audio_files(directory, target_frames):
    sr=22050
    n_mfcc=128
    features = []
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            file_path = os.path.join(directory, filename)
            # Load audio file
            y, sr = librosa.load(file_path, sr=sr)
            
            # Compute MFCCs
            mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
            
            if mfcc.shape[1] > target_frames:
                mfcc = mfcc[:, :target_frames]
            elif mfcc.shape[1] < target_frames:
                # Pad with zeros if there are fewer frames than needed
                padding = np.zeros((n_mfcc, target_frames - mfcc.shape[1]))
                mfcc = np.concatenate((mfcc, padding), axis=1)
            
            features.append(mfcc)

    # Convert list to 3D numpy array
    features = np.stack(features)

    # Save all features to .npy file
    output_file_name = f'all_mfcc_features_{str(target_frames)}.npy'
    output_file_path = os.path.join(directory, output_file_name)
    np.save(output_file_path, features)  # Saving in .npy format

    return features

# Specify the directory that containing audio files
directory = r'C:\Users\....'
all_features = process_audio_files(directory, 347)
all_features = process_audio_files(directory, 173)
all_features = process_audio_files(directory, 86)
all_features = process_audio_files(directory, 43)

# Get the label for all audio

In [None]:
def extract_labels_and_save(directory):
    labels = []
    # Get the file name
    for filename in os.listdir(directory):
        if filename.endswith(".wav"):
            parts = filename.split('-')
            # Extract the label (third part)
            label = int(parts[2])
            labels.append(label)
    
    # Convert list to NumPy array
    labels_array = np.array(labels)
    
    # Save in the .npy file
    output_file_path = os.path.join(directory, 'labels.npy')
    np.save(output_file_path, labels_array)
    return labels_array

# Specify the directory that containing audio files
directory = r'C:\Users\....'
labels = extract_labels_and_save(directory)