In [None]:
pip install pydub


Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1


In [None]:
import os
from pydub import AudioSegment

input_folder = "/content/drive/MyDrive/speechvideo/Correct"
output_folder = "/content/drive/MyDrive/Speech/Voice_analysis/Correct"

os.makedirs(output_folder, exist_ok=True)

for audio_file in os.listdir(input_folder):
    if audio_file.endswith(".m4a"):
        audio_path = os.path.join(input_folder, audio_file)
        output_path = os.path.join(output_folder, f"{os.path.splitext(audio_file)[0]}.wav")

        # Load and convert .m4a to .wav
        try:
            audio = AudioSegment.from_file(audio_path, format="m4a")
            audio.export(output_path, format="wav")
            print(f"Converted: {audio_file} -> {output_path}")
        except Exception as e:
            print(f"Error converting {audio_file}: {e}")


Converted: 001.m4a -> /content/drive/MyDrive/Speech/Voice_analysis/Correct/001.wav
Converted: 002.m4a -> /content/drive/MyDrive/Speech/Voice_analysis/Correct/002.wav
Converted: 003.m4a -> /content/drive/MyDrive/Speech/Voice_analysis/Correct/003.wav
Converted: 004.m4a -> /content/drive/MyDrive/Speech/Voice_analysis/Correct/004.wav
Converted: 005.m4a -> /content/drive/MyDrive/Speech/Voice_analysis/Correct/005.wav
Converted: 006.m4a -> /content/drive/MyDrive/Speech/Voice_analysis/Correct/006.wav
Converted: 007.m4a -> /content/drive/MyDrive/Speech/Voice_analysis/Correct/007.wav
Converted: 008.m4a -> /content/drive/MyDrive/Speech/Voice_analysis/Correct/008.wav
Converted: 009.m4a -> /content/drive/MyDrive/Speech/Voice_analysis/Correct/009.wav
Converted: 010.m4a -> /content/drive/MyDrive/Speech/Voice_analysis/Correct/010.wav
Converted: 011.m4a -> /content/drive/MyDrive/Speech/Voice_analysis/Correct/011.wav
Converted: 012.m4a -> /content/drive/MyDrive/Speech/Voice_analysis/Correct/012.wav
Conv

In [None]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
import pickle

def extract_features(directory):
    features = []
    labels = []
    for label in ['Correct', 'Incorrect']:
        folder = os.path.join(directory, label)
        for file in os.listdir(folder):
            if file.endswith('.wav'):
                file_path = os.path.join(folder, file)
                y, sr = librosa.load(file_path, sr=None)
                mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
                features.append(np.mean(mfcc.T, axis=0))
                labels.append(label)
    return np.array(features), np.array(labels)


In [None]:
dataset_directory = '/content/drive/MyDrive/Speech/Voice_analysis'
X, y = extract_features(dataset_directory)

In [None]:
le = LabelEncoder()
y_encoded = le.fit_transform(y)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

In [None]:
clf = SVC(kernel='linear', random_state=42)
clf.fit(X_train, y_train)

In [None]:
accuracy = clf.score(X_test, y_test)
print(f"Model Accuracy: {accuracy * 100:.2f}%")

Model Accuracy: 78.57%


In [None]:
with open('voice_model.pkl', 'wb') as f:
    pickle.dump(clf, f)

In [None]:
def predict_audio(file_path, model, label_encoder):
    y, sr = librosa.load(file_path, sr=None)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    feature = np.mean(mfcc.T, axis=0).reshape(1, -1)
    prediction = model.predict(feature)
    return label_encoder.inverse_transform(prediction)

In [None]:
with open('voice_model.pkl', 'rb') as f:
    loaded_model = pickle.load(f)

In [None]:
test_file = '/content/drive/MyDrive/Speech/Voice_analysis/Correct/003.wav'
prediction = predict_audio(test_file, loaded_model, le)
print("Prediction:", prediction[0])

Prediction: Correct
