<a href="https://colab.research.google.com/github/Jhyron/DSP/blob/main/audio_recognition_project/audio_recognition/CSV_%26_Audio_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import librosa
import numpy as np

def extract_mfcc_coefficients(audio_path, total_coeffs=1300, n_mfcc=13):
    """
    Extracts the first 'total_coeffs' MFCC coefficients from an audio file.
    """
    try:
        # Load audio file (librosa will automatically select the best backend)
        y, sr = librosa.load(audio_path, sr=None)

        # Compute MFCCs with 13 coefficients per frame
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)

        # Flatten to get a single-dimensional array and take the first 1300 coefficients
        mfcc_flat = mfcc.T.flatten()[:total_coeffs]

        # If the flattened array has less than 1300 coefficients, pad with zeros
        if len(mfcc_flat) < total_coeffs:
            mfcc_flat = np.pad(mfcc_flat, (0, total_coeffs - len(mfcc_flat)), mode='constant')

        return mfcc_flat

    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return np.zeros(total_coeffs)  # Return a zero array if there's an error

def generate_mfcc_matrix(num_files=50, total_coeffs=1300, file_prefix="hen_audio_", file_suffix=".wav"):
    """
    Loops through audio files, extracts MFCCs, and generates a matrix with shape (100, 1300).
    """
    mfcc_matrix = np.zeros((num_files, total_coeffs))

    for i in range(1, num_files + 1):
        # Format file name as thevoiceECE_01.wav, thevoiceECE_02.wav, ..., thevoiceECE_100.wav
        audio_path = f"{file_prefix}{i:02}{file_suffix}"
        mfcc_coeffs = extract_mfcc_coefficients(audio_path, total_coeffs=total_coeffs)
        mfcc_matrix[i - 1, :] = mfcc_coeffs

    return mfcc_matrix

def save_matrix_to_csv(matrix, output_path="final_mfcc_matrix.csv"):
    """
    Saves the MFCC matrix to a CSV file.
    """
    np.savetxt(output_path, matrix, delimiter=',')
    print(f"Final MFCC matrix saved to {output_path}")

def main():
    # Generate the MFCC matrix from 100 audio files
    mfcc_matrix = generate_mfcc_matrix()

    # Check matrix shape for confirmation
    print("Final MFCC Matrix Shape: ", mfcc_matrix.shape)  # Should be (50, 1300)

    # Save the matrix to a CSV file
    save_matrix_to_csv(mfcc_matrix)

if __name__ == "__main__":
    main()


Final MFCC Matrix Shape:  (50, 1300)
Final MFCC matrix saved to final_mfcc_matrix.csv


In [None]:
!pip install pysoundfile


Collecting pysoundfile
  Downloading PySoundFile-0.9.0.post1-py2.py3-none-any.whl.metadata (9.4 kB)
Downloading PySoundFile-0.9.0.post1-py2.py3-none-any.whl (24 kB)
Installing collected packages: pysoundfile
Successfully installed pysoundfile-0.9.0.post1


In [None]:
import numpy as np
from sklearn import svm
import os
import librosa
import pathlib

def extract():
    x = True
    #path to directory of the csv files
    directory = "G:\\My Drive\\Academics\\AC_24-25_1\\ECE 4 - Signals, Spectra, Signal Processing\\audio_recognition"
    with os.scandir(directory) as entries:
        for entry in entries:
            if entry.is_file():
                filepath = entry.path
                features = np.loadtxt(filepath,
                                 delimiter=",", dtype=float)

                filename = filepath.split('\\')
                student = filename[6].split('.')[0]
                if x:
                    all_features = features
                    all_class = np.full((100, 1), student)
                    x = False
                else:
                    for i in features:
                        all_features = np.append(all_features, [i], axis=0)
                        all_class = np.append(all_class, student)
    return all_features, all_class


if name == 'main':
    train_features, train_class = extract()
    audio_classifier = svm.LinearSVC()
    audio_classifier.fit(train_features, train_class)


    #Cropping Activity

    #path to the fresh sample file
    path_to_audio = "temporary.wav"
    audio, sample_rate = librosa.load(path_to_audio, sr=44100)
    first_positive_index = np.where(audio > 0.12)[0]
    # Check if there's a positive sample
    if first_positive_index.size > 0:
        # Crop the audio from the first positive sample to 44100 samples
        start_index = first_positive_index[0]
        cropped_audio = audio[start_index:start_index + 44100]
        # Ensure we don't exceed the array length
        if len(cropped_audio) < 44100:
            print("Not enough samples to crop to 44100.")
        else:
            # Write the cropped audio to a new file
            sf.write(path_to_audio, cropped_audio, sample_rate, subtype='PCM_24')
    else:
        print("No positive samples found in the audio.")


    #CSV Activity
    total_coeffs = 1300
    n_mfcc = 13
    y, sr = librosa.load(path_to_audio, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfcc_flat = mfcc.T.flatten()[:total_coeffs]
    if len(mfcc_flat) < total_coeffs:
        mfcc_flat = np.pad(mfcc_flat, (0, total_coeffs - len(mfcc_flat)), mode='constant')



    student_name = audio_classifier.predict([mfcc_flat])
    print(student_name)

NameError: name 'name' is not defined