In [18]:
import numpy as np
import librosa
import os
import sounddevice as sd
import scipy.io.wavfile as wav
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from tensorflow.keras.utils import to_categorical

# Tamil Uyir letters
uyir_letters = ['அ', 'ஆ', 'இ', 'ஈ', 'உ', 'ஊ', 'எ', 'ஏ', 'ஐ', 'ஒ', 'ஓ', 'ஔ']

# Function to extract features from an audio file
def extract_features(file_path, max_pad_len=174):
    audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast') 
    mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    pad_width = max_pad_len - mfccs.shape[1]
    if pad_width > 0:
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        mfccs = mfccs[:, :max_pad_len]
    return mfccs

# Function to load dataset
def load_dataset(dataset_path):
    features = []
    labels = []
    
    for letter in uyir_letters:
        letter_folder = os.path.join(dataset_path, letter)
        for file_name in os.listdir(letter_folder):
            if file_name.endswith('.wav'):
                file_path = os.path.join(letter_folder, file_name)
                mfccs = extract_features(file_path)
                features.append(mfccs)
                labels.append(uyir_letters.index(letter))

    features = np.array(features)
    labels = np.array(labels)
    return features, labels

# Function to train the ML model (Random Forest)
def train_ml_model(X_train, y_train):
    model = RandomForestClassifier(n_estimators=100, random_state=42)
    model.fit(X_train, y_train)
    return model

# Function to record a voice and save as wav
def record_voice(filename, duration=3, fs=16000):
    print("Recording...")
    recording = sd.rec(int(duration * fs), samplerate=fs, channels=1)
    sd.wait()  # Wait until the recording is finished
    wav.write(filename, fs, recording)
    print("Recording finished")

# Function to recognize Tamil Uyir letter from recorded voice
def recognize_uyir_letter(model, filename):
    mfccs = extract_features(filename)
    mfccs = np.expand_dims(mfccs, axis=0)  # Add batch dimension
    mfccs = mfccs.reshape(1, -1)  # Flatten to match model input shape
    prediction = model.predict(mfccs)
    return uyir_letters[prediction[0]]

# Main execution
dataset_path = "./audio_dataset"
X, y = load_dataset(dataset_path)

# Reshape X for use with ML models (flatten the MFCC features)
X_reshaped = X.reshape(X.shape[0], -1)

# Train-test split
X_train, X_val, y_train, y_val = train_test_split(X_reshaped, y, test_size=0.2, random_state=42)

# Train the Random Forest model
model = train_ml_model(X_train, y_train)

# Evaluate the model on validation data
y_pred = model.predict(X_val)
accuracy = accuracy_score(y_val, y_pred)
print(f"Validation Accuracy: {accuracy * 100:.2f}%")

# Save the model for later use
import joblib
joblib.dump(model, 'uyir_letters_ml_model.pkl')

Validation Accuracy: 87.69%


['uyir_letters_ml_model.pkl']

In [54]:
# Record and predict
voice_filename = 'recorded_voice.wav'
record_voice(voice_filename, duration=3)
recognized_letter = recognize_uyir_letter(model, voice_filename)

print(f"Recognized Tamil Uyir letter: {recognized_letter}")

Recording...
Recording finished
Recognized Tamil Uyir letter: ஏ
