In [2]:
import os
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from tqdm import tqdm

# Define the path to your dataset
data_path = '/home/sreenath/Work/genres'

# List all genres
genres = os.listdir(data_path)

# Initialize lists to hold data and labels
data = []
labels = []

# Function to extract features from an audio file
def extract_features(file_path):
    y, sr = librosa.load(file_path, duration=30)  # Limiting to 30 seconds for consistency
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    return np.hstack((np.mean(mfccs, axis=1), np.mean(chroma, axis=1), np.mean(mel, axis=1), np.mean(contrast, axis=1)))

# Loop through each genre and process files with a progress bar
for genre in genres:
    genre_path = os.path.join(data_path, genre)
    for file_name in tqdm(os.listdir(genre_path), desc=f'Processing {genre}', unit='file'):
        if file_name.endswith('.au'):
            file_path = os.path.join(genre_path, file_name)
            try:
                features = extract_features(file_path)
                data.append(features)
                labels.append(genre)
            except Exception as e:
                print(f"Error processing {file_path}: {e}")

# Convert lists to numpy arrays
data = np.array(data)
labels = np.array(labels)

# Encode labels to numerical values
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
labels = le.fit_transform(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Train a RandomForest Classifier
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test data
y_pred = model.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average='weighted')
recall = recall_score(y_test, y_pred, average='weighted')
f1 = f1_score(y_test, y_pred, average='weighted')

# Display metrics
print(f"Accuracy: {accuracy * 100:.2f}%")
print(f"Precision: {precision * 100:.2f}%")
print(f"Recall: {recall * 100:.2f}%")
print(f"F1 Score: {f1 * 100:.2f}%")


Processing hiphop: 100%|██████████| 100/100 [00:27<00:00,  3.61file/s]
Processing metal: 100%|██████████| 100/100 [00:26<00:00,  3.71file/s]
Processing rock: 100%|██████████| 100/100 [00:26<00:00,  3.71file/s]
Processing jazz: 100%|██████████| 100/100 [00:26<00:00,  3.71file/s]
Processing reggae: 100%|██████████| 100/100 [00:27<00:00,  3.64file/s]
Processing blues: 100%|██████████| 100/100 [00:26<00:00,  3.71file/s]
Processing country: 100%|██████████| 100/100 [00:26<00:00,  3.71file/s]
Processing disco: 100%|██████████| 100/100 [00:27<00:00,  3.69file/s]
Processing classical: 100%|██████████| 100/100 [00:27<00:00,  3.69file/s]
Processing pop: 100%|██████████| 100/100 [00:27<00:00,  3.68file/s]


Accuracy: 68.50%
Precision: 69.55%
Recall: 68.50%
F1 Score: 68.14%


In [3]:
print(f"Total samples: {len(data)}")

# Save the extracted features and labels
np.save('features.npy', data)
np.save('labels.npy', labels)

Total samples: 1000


In [4]:
from sklearn.metrics import accuracy_score, classification_report

# Predict on the test set
y_pred = model.predict(X_test)

# Calculate Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy * 100:.2f}%')

# Detailed Classification Report
report = classification_report(y_test, y_pred)
print(report)



Accuracy: 68.50%
              precision    recall  f1-score   support

           0       0.64      0.73      0.68        22
           1       0.95      0.91      0.93        23
           2       0.65      0.60      0.62        25
           3       0.43      0.69      0.53        13
           4       0.86      0.60      0.71        20
           5       0.74      0.67      0.70        21
           6       0.81      1.00      0.90        13
           7       0.76      0.90      0.83        21
           8       0.47      0.53      0.50        15
           9       0.56      0.37      0.44        27

    accuracy                           0.69       200
   macro avg       0.69      0.70      0.68       200
weighted avg       0.70      0.69      0.68       200



In [7]:
import librosa
import numpy as np

# Function to extract features from a new audio file (same as before)
def extract_features(file_path):
    y, sr = librosa.load(file_path, duration=30)  # Limiting to 30 seconds for consistency
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    mel = librosa.feature.melspectrogram(y=y, sr=sr)
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    return np.hstack((np.mean(mfccs, axis=1), np.mean(chroma, axis=1), np.mean(mel, axis=1), np.mean(contrast, axis=1)))

# Load the new audio file (change 'path_to_new_audio' to the path of your file)
new_audio_path = '/home/sreenath/Work/genres/blues/blues.00072.au'
features = extract_features(new_audio_path)

# Reshape features for the classifier (since it's a single sample, it needs a 2D shape)
features = features.reshape(1, -1)

# Predict the genre using the trained model
predicted_genre_index = model.predict(features)

# Convert numerical prediction back to genre label
predicted_genre = le.inverse_transform([predicted_genre_index])

print(f"The predicted genre for the new audio file is: {predicted_genre[0]}")


The predicted genre for the new audio file is: blues


  y = column_or_1d(y, warn=True)
