In [6]:
import librosa
import os
import numpy as np
import soundfile as sf  # Import soundfile to save audio files

def reduce_noise(file_path, output_path):
    """
    Perform noise reduction by applying a high-pass filter (pre-emphasis).
    Args:
        file_path (str): Path to the input audio file.
        output_path (str): Path to save the denoised audio file.
    """
    # Load the audio file
    y, sr = librosa.load(file_path, sr=16000)
    
    # Perform pre-emphasis to reduce noise
    y_denoised = librosa.effects.preemphasis(y)
    
    # Save the denoised audio to a new file using soundfile
    sf.write(output_path, y_denoised, sr)
    print(f"Noise-reduced file saved at: {output_path}")

def apply_noise_reduction(base_path):
    """
    Apply noise reduction to all .m4a files in the dataset.
    Args:
        base_path (str): Path to the base folder containing the dataset.
    """
    for folder_name in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder_name)
        if os.path.isdir(folder_path):  # Only process folders
            for file_name in os.listdir(folder_path):
                if file_name.endswith(".m4a"):  # Process only .m4a files
                    file_path = os.path.join(folder_path, file_name)
                    # Generate new file path for the denoised file
                    output_path = os.path.join(folder_path, file_name.replace(".m4a", "_denoised.wav"))
                    # Perform noise reduction
                    reduce_noise(file_path, output_path)

# Base path to your dataset
base_path = r"D:\H\works\RP\shanoojan\ds"
apply_noise_reduction(base_path)


  y, sr = librosa.load(file_path, sr=16000)


Noise-reduced file saved at: D:\H\works\RP\shanoojan\ds\aadu\aadu_clip10_denoised.wav
Noise-reduced file saved at: D:\H\works\RP\shanoojan\ds\aadu\aadu_clip11_denoised.wav
Noise-reduced file saved at: D:\H\works\RP\shanoojan\ds\aadu\aadu_clip12_denoised.wav
Noise-reduced file saved at: D:\H\works\RP\shanoojan\ds\aadu\aadu_clip13_denoised.wav
Noise-reduced file saved at: D:\H\works\RP\shanoojan\ds\aadu\aadu_clip14_denoised.wav
Noise-reduced file saved at: D:\H\works\RP\shanoojan\ds\aadu\aadu_clip15_denoised.wav
Noise-reduced file saved at: D:\H\works\RP\shanoojan\ds\aadu\aadu_clip16_denoised.wav
Noise-reduced file saved at: D:\H\works\RP\shanoojan\ds\aadu\aadu_clip17_denoised.wav
Noise-reduced file saved at: D:\H\works\RP\shanoojan\ds\aadu\aadu_clip18_denoised.wav
Noise-reduced file saved at: D:\H\works\RP\shanoojan\ds\aadu\aadu_clip19_denoised.wav
Noise-reduced file saved at: D:\H\works\RP\shanoojan\ds\aadu\aadu_clip2_denoised.wav
Noise-reduced file saved at: D:\H\works\RP\shanoojan\ds

In [10]:
import librosa
import os
import numpy as np
import soundfile as sf  # To save the augmented audio

def augment_audio(file_path):
    """
    Perform data augmentation on an audio file: pitch shift, time stretching, and adding noise.
    Args:
        file_path (str): Path to the input audio file.
    Returns:
        list: List of augmented audio samples.
    """
    # Load the audio file
    y, sr = librosa.load(file_path, sr=16000)

    # Pitch Shift (change pitch)
    y_pitch = librosa.effects.pitch_shift(y, sr=sr, n_steps=2)  # Adjust pitch by 2 steps

    # Time Stretching (speed up)
    y_stretch = librosa.effects.time_stretch(y, rate=1.1)  # Speed up by 10%

    # Add random noise
    noise = np.random.randn(len(y)) * 0.01  # Small random noise
    y_noise = y + noise

    return [y_pitch, y_stretch, y_noise]

def save_augmented_data(base_path):
    """
    Augment all denoised audio files in the dataset by pitch shifting, time stretching, and adding noise.
    Args:
        base_path (str): Path to the base folder containing the dataset.
    """
    for folder_name in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder_name)
        if os.path.isdir(folder_path):  # Only process folders
            for file_name in os.listdir(folder_path):
                # Process only denoised files (.wav files with '_denoised' in their names)
                if file_name.endswith("_denoised.wav"):  # Denoised files
                    file_path = os.path.join(folder_path, file_name)
                    augmented_audio = augment_audio(file_path)
                    
                    # Save each augmented version
                    for idx, audio in enumerate(augmented_audio, start=1):
                        new_file_name = f"augmented_{idx}_{file_name}"
                        new_file_path = os.path.join(folder_path, new_file_name)
                        sf.write(new_file_path, audio, samplerate=16000)  # Correct argument
                        print(f"Saved augmented file: {new_file_path}")

# Apply data augmentation to the denoised files
base_path = r"D:\H\works\RP\shanoojan\ds"
save_augmented_data(base_path)


Saved augmented file: D:\H\works\RP\shanoojan\ds\aadu\augmented_1_aadu_clip10_denoised.wav
Saved augmented file: D:\H\works\RP\shanoojan\ds\aadu\augmented_2_aadu_clip10_denoised.wav
Saved augmented file: D:\H\works\RP\shanoojan\ds\aadu\augmented_3_aadu_clip10_denoised.wav
Saved augmented file: D:\H\works\RP\shanoojan\ds\aadu\augmented_1_aadu_clip11_denoised.wav
Saved augmented file: D:\H\works\RP\shanoojan\ds\aadu\augmented_2_aadu_clip11_denoised.wav
Saved augmented file: D:\H\works\RP\shanoojan\ds\aadu\augmented_3_aadu_clip11_denoised.wav
Saved augmented file: D:\H\works\RP\shanoojan\ds\aadu\augmented_1_aadu_clip12_denoised.wav
Saved augmented file: D:\H\works\RP\shanoojan\ds\aadu\augmented_2_aadu_clip12_denoised.wav
Saved augmented file: D:\H\works\RP\shanoojan\ds\aadu\augmented_3_aadu_clip12_denoised.wav
Saved augmented file: D:\H\works\RP\shanoojan\ds\aadu\augmented_1_aadu_clip13_denoised.wav
Saved augmented file: D:\H\works\RP\shanoojan\ds\aadu\augmented_2_aadu_clip13_denoised.wav

In [11]:
import pandas as pd
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import os
import soundfile as sf

# Function to extract MFCC features
def extract_mfcc(file_path, n_mfcc=13):
    """
    Extract MFCC features from an audio file.
    Args:
        file_path (str): Path to the audio file.
        n_mfcc (int): Number of MFCC features to extract.
    Returns:
        np.array: Averaged MFCC features for the file.
    """
    y, sr = librosa.load(file_path, sr=16000)  # Load audio at 16kHz
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)  # Extract MFCCs
    return np.mean(mfcc.T, axis=0)  # Average across time frames

# Prepare the dataset (MFCC features and labels)
def prepare_data(base_path):
    data = []
    labels = []
    
    for folder_name in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder_name)
        if os.path.isdir(folder_path):
            for file_name in os.listdir(folder_path):
                if file_name.endswith("_denoised.wav"):  # Process only denoised .wav files
                    file_path = os.path.join(folder_path, file_name)
                    mfcc_features = extract_mfcc(file_path)
                    data.append(mfcc_features)
                    labels.append(folder_name)  # The folder name is the label

    return np.array(data), np.array(labels)

# Load the features and labels
base_path = r"D:\H\works\RP\shanoojan\ds"  # Path to your dataset
X, y = prepare_data(base_path)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Save the trained model
import joblib
joblib.dump(model, "speech_model_augmented.pkl")
print("Model saved to speech_model_augmented.pkl")


Accuracy: 0.45
Classification Report:
              precision    recall  f1-score   support

        aadu       0.36      0.29      0.32        28
       appam       0.59      0.73      0.65        22
      bothal       0.55      0.48      0.51        23
      erumbu       0.35      0.39      0.37        18
        kaai       0.53      0.43      0.48        23
      kaatru       0.39      0.41      0.40        22
       kadai       0.44      0.58      0.50        19
       kadal       0.26      0.22      0.24        23
   kadalalai       0.36      0.48      0.41        25
         kal       0.29      0.40      0.33        15
      kalugu       0.43      0.41      0.42        22
     kannadi       0.38      0.26      0.31        19
       kathi       0.50      0.67      0.57        15
      kinnam       0.33      0.41      0.37        17
        koli       0.47      0.36      0.41        25
      koodai       0.37      0.27      0.31        26
    kuthirai       0.37      0.67      0.48

In [12]:
import joblib
import librosa
import numpy as np
from sklearn.metrics import accuracy_score, classification_report
import os
import soundfile as sf

# Load the trained model
model = joblib.load('speech_model_augmented.pkl')

# Function to extract MFCC from audio file (same as used during training)
def extract_mfcc(file_path, n_mfcc=13):
    y, sr = librosa.load(file_path, sr=16000)  # Load audio at 16kHz
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)  # Extract MFCCs
    return np.mean(mfcc.T, axis=0)  # Average across time frames

# Prepare the test data (MFCC features and labels)
def prepare_test_data(base_path):
    data = []
    labels = []
    
    for folder_name in os.listdir(base_path):
        folder_path = os.path.join(base_path, folder_name)
        if os.path.isdir(folder_path):
            for file_name in os.listdir(folder_path):
                # Process only denoised files for testing (.wav files with '_denoised' in their names)
                if file_name.endswith("_denoised.wav"):  # Denoised files
                    file_path = os.path.join(folder_path, file_name)
                    mfcc_features = extract_mfcc(file_path)
                    data.append(mfcc_features)
                    labels.append(folder_name)  # The folder name is the label

    return np.array(data), np.array(labels)

# Test the model on a separate test set (different from training data)
test_base_path = r"D:\H\works\RP\shanoojan\ds"  # Path to your test dataset
X_test, y_test = prepare_test_data(test_base_path)

# Make predictions using the trained model
y_pred = model.predict(X_test)

# Evaluate the model
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))


Accuracy: 0.89
Classification Report:
              precision    recall  f1-score   support

        aadu       0.86      0.81      0.83       104
       appam       0.90      0.94      0.92       104
      bothal       0.91      0.88      0.89       100
      erumbu       0.88      0.89      0.89       104
        kaai       0.91      0.88      0.90       108
      kaatru       0.86      0.87      0.87       100
       kadai       0.83      0.89      0.86        76
       kadal       0.86      0.83      0.84       104
   kadalalai       0.81      0.88      0.84       104
         kal       0.86      0.91      0.88       100
      kalugu       0.89      0.88      0.88       108
     kannadi       0.92      0.87      0.89       104
       kathi       0.90      0.95      0.92        96
      kinnam       0.87      0.90      0.89       104
        koli       0.90      0.85      0.87       104
      koodai       0.88      0.82      0.85       104
    kuthirai       0.85      0.95      0.90