In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import pandas as pd

In [3]:
!pip install librosa
!pip install scikit-learn




In [5]:
import os
import librosa
import numpy as np


In [6]:
confident_folder = '/content/drive/MyDrive/dataset/Voicedata/confident'
non_confident_folder = '/content/drive/MyDrive/dataset/Voicedata/Non-confident'


In [23]:
import librosa
import numpy as np

def load_audio_files_combined(folder):
    audio_features = []
    labels = []

    for file in os.listdir(folder):
        if file.endswith('.wav'):
            file_path = os.path.join(folder, file)
            print(f'Processing {file_path}')

            # Load audio file
            audio, sr = librosa.load(file_path, sr=None)

            # Extract MFCCs
            mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)
            mfccs_mean = np.mean(mfccs.T, axis=0)

            # Extract fundamental frequency (F0)
            pitches, magnitudes = librosa.core.piptrack(y=audio, sr=sr)
            pitch_mean = np.mean(pitches[pitches > 0])  # Get the mean pitch

            # Extract Spectral Centroid
            spectral_centroid = np.mean(librosa.feature.spectral_centroid(y=audio, sr=sr))

            # Extract Spectral Bandwidth
            spectral_bandwidth = np.mean(librosa.feature.spectral_bandwidth(y=audio, sr=sr))

            # Extract Chroma features
            chroma = np.mean(librosa.feature.chroma_stft(y=audio, sr=sr))

            # Combine all features into one feature vector
            features = np.hstack([mfccs_mean, pitch_mean, spectral_centroid, spectral_bandwidth, chroma])
            audio_features.append(features)

            # Use folder name as label (assumes confident/non-confident structure)
            labels.append(folder.split('/')[-1])

    return np.array(audio_features), np.array(labels)

# Load audio files from both confident and non-confident folders
confident_features, confident_labels = load_audio_files_combined('/content/drive/MyDrive/dataset/Voicedata/confident')
non_confident_features, non_confident_labels = load_audio_files_combined('/content/drive/MyDrive/dataset/Voicedata/Non-confident')

# Combine features and labels
X = np.vstack((confident_features, non_confident_features))
y = np.hstack((confident_labels, non_confident_labels))


Processing /content/drive/MyDrive/dataset/Voicedata/confident/392-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/387-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/384-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/386-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/385-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/383-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/380-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/381-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/39-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/516-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/517-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/515-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/513-1.wav
Processing /content/drive/MyDrive/dataset/Voicedata/confident/514

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)
  return pitch_tuning(


Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/342-0.wav
Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/05-0.wav
Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/343-0.wav
Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/336-0.wav
Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/339-0.wav
Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/344-0.wav
Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/338-0.wav
Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/340-0.wav
Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/337-0.wav
Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/341-0.wav
Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/04-0.wav
Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/335-0.wav
Processing /content/drive/MyDrive/dataset/Voicedata/Non-confident/388-0.wav
Processing /co

# Preprocess and Split the Dataset

In [24]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
y_encoded = le.fit_transform(y)

X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)


### Split your data into training and testing sets:

In [25]:
print('X shape:', X.shape)
print('y_encoded shape:', y_encoded.shape)




X shape: (1000, 17)
y_encoded shape: (1000,)


In [26]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report


In [27]:
# Initialize the model
model = RandomForestClassifier(n_estimators=100, random_state=42)

# Train the model
model.fit(X_train, y_train)


In [28]:
# Predict on the test set
y_pred = model.predict(X_test)


In [29]:
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")

# Print a detailed classification report
print(classification_report(y_test, y_pred, target_names=le.classes_))


Accuracy: 0.72
               precision    recall  f1-score   support

Non-confident       0.72      0.79      0.75       109
    confident       0.72      0.64      0.67        91

     accuracy                           0.72       200
    macro avg       0.72      0.71      0.71       200
 weighted avg       0.72      0.72      0.72       200

