In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import librosa
import librosa.display
import xgboost as xgb
import os

# Function to extract features from audio files
def extract_features(file_path, num_mfcc=13, n_fft=2048, hop_length=512):
    try:
        audio, sample_rate = librosa.load(file_path, res_type='kaiser_fast')
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=num_mfcc, n_fft=n_fft, hop_length=hop_length)
        mfccs_mean = np.mean(mfccs.T, axis=0)
    except Exception as e:
        print("Error encountered while parsing file: ", file_path)
        return None
    
    return mfccs_mean

# Load GTZAN dataset
data_dir = 'gtzan_genre'
genres = os.listdir(data_dir)
num_mfcc = 13

features = []
labels = []

for genre in genres:
    genre_dir = os.path.join(data_dir, genre)
    for file in os.listdir(genre_dir):
        file_path = os.path.join(genre_dir, file)
        feature = extract_features(file_path, num_mfcc=num_mfcc)
        if feature is not None:
            features.append(feature)
            labels.append(genre)

# Convert features and labels to numpy arrays
features = np.array(features)
labels = np.array(labels)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)

# Train XGBoost model
model = xgb.XGBClassifier()
model.fit(X_train, y_train)

# Evaluate model
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)
print("Classification Report:")
print(classification_report(y_test, y_pred))

# Optional: Predict new audio file
# Provide the path to the WAV file you want to predict
# new_audio_file = 'path_to_your_wav_file.wav'
# new_feature = extract_features(new_audio_file, num_mfcc=num_mfcc)
# if new_feature is not None:
#     prediction = model.predict(np.array([new_feature]))
#     print("Predicted genre:", prediction[0])
# else:
#     print("Failed to extract features from the audio file.")


ModuleNotFoundError: No module named 'xgboost'

In [None]:
print('loading data')

all_data = pickle.load(open('dataset_standardized_all