# LAB-7-A

### Author

- [Navaneeth Sivakumar - 21BAI1302](https://github.com/Sivakumar-Navaneeth)

In [1]:
import librosa
import os
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score

# Step 1: Function to extract features from audio

In [2]:
def extract_features(audio_path, n_mfcc=13):
    y, sr = librosa.load(audio_path, sr=None)

    # MFCC
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
    mfcc_mean = np.mean(mfcc.T, axis=0)

    # Chroma
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = np.mean(chroma.T, axis=0)

    # Mel-spectrogram
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    mel_mean = np.mean(mel.T, axis=0)

    # Combine all features
    features = np.hstack([mfcc_mean, chroma_mean, mel_mean])

    return features

# Step 2: Load dataset and extract features

In [14]:
audio_folder = 'free-spoken-digit-dataset-master/free-spoken-digit-dataset-master/recordings'
audio_files = [f for f in os.listdir(audio_folder) if f.endswith('.wav')]

labels = [int(f.split('_')[0]) for f in audio_files]

# Extract features for all audio files
features = []
for file in audio_files:
    file_path = os.path.join(audio_folder, file)
    audio_features = extract_features(file_path)
    features.append(audio_features)

X = pd.DataFrame(features)
# print(X.shape)
print(X.head())
y = pd.Series(labels)



          0          1          2          3          4          5    \
0 -164.544647   2.768874  33.804203  24.320789 -37.505180 -27.450354   
1 -250.517212  33.151550  -3.604604  -7.039331 -39.924980 -50.474068   
2 -312.191864  11.199821  -0.455752   6.471281 -37.523369 -36.116360   
3 -262.359131   7.685692   1.497396   1.708157 -37.620552 -35.234795   
4 -249.161102  12.089952  -4.094543   9.838566 -42.190765 -37.469681   

         6          7          8          9    ...       143       144  \
0 -39.704735 -28.177942 -30.976017   7.688464  ...  1.318282  1.168912   
1 -43.294243 -27.839779 -39.000805  -3.448081  ...  0.074615  0.020534   
2 -24.461546 -35.563080 -33.041653 -11.725566  ...  0.244455  0.037801   
3 -32.962082 -31.147699 -36.620861  -9.146376  ...  0.034317  0.134275   
4 -34.216095 -30.324535 -40.210140  -7.836192  ...  0.205039  0.041748   

        145       146       147       148       149       150       151  \
0  0.177905  0.448251  0.111238  0.076483  0.05

# Step 3: Split data into training and testing sets

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Train a RandomForestClassifier

In [5]:
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Step 5: Make predictions

In [6]:
y_pred = clf.predict(X_test)

# Step 6: Evaluate the model

In [7]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.2f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.95
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.96      0.97        72
           1       0.98      0.93      0.96        69
           2       0.91      0.93      0.92        57
           3       0.96      0.91      0.94        56
           4       0.98      0.98      0.98        59
           5       0.93      0.90      0.92        63
           6       0.98      1.00      0.99        56
           7       0.95      1.00      0.97        55
           8       0.98      1.00      0.99        57
           9       0.88      0.95      0.91        56

    accuracy                           0.95       600
   macro avg       0.95      0.96      0.95       600
weighted avg       0.96      0.95      0.95       600

