In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, classification_report
import joblib
import librosa
import numpy as np
import os
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv1D, MaxPooling1D, Flatten, Dense, Dropout

In [21]:
# --- Step 1: Data Preparation and Preprocessing ---

print("Step 1: Data Preparation and Preprocessing")
print("-" * 40)

# Load the dataset
try:
    features_df = pd.read_csv("features_3_sec.csv")
except FileNotFoundError:
    print("Error: 'features_3_sec.csv' not found. Please ensure the file is in the same directory as the script.")
    exit()

# Drop the 'filename' column as it's not a feature for the model
if 'filename' in features_df.columns:
    features_df.drop('filename', axis=1, inplace=True)

# Handle missing values by dropping rows with NaN
features_df.dropna(inplace=True)

# Encode the labels from text to numbers
label_encoder = LabelEncoder()
features_df['label'] = label_encoder.fit_transform(features_df['label'])

# Separate features (X) and labels (y)
X = features_df.drop('label', axis=1)
y = features_df['label']

# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split the data into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42, stratify=y)

# Reshape data for the CNN model
X_train_cnn = np.expand_dims(X_train, axis=2)
X_test_cnn = np.expand_dims(X_test, axis=2)

print("Data preparation complete.")
print("X_train shape:", X_train_cnn.shape)
print("X_test shape:", X_test_cnn.shape)
print("\n" + "=" * 40 + "\n")

Step 1: Data Preparation and Preprocessing
----------------------------------------
Data preparation complete.
X_train shape: (7992, 58, 1)
X_test shape: (1998, 58, 1)




In [22]:
# --- Step 2: Building and Training the CNN Model ---

print("Step 2: Building and Training the CNN Model")
print("-" * 40)

num_features = X_train_cnn.shape[1]
num_labels = len(np.unique(y))

model = Sequential([
    Conv1D(64, kernel_size=3, activation='relu', input_shape=(num_features, 1)),
    MaxPooling1D(pool_size=2),
    Conv1D(128, kernel_size=3, activation='relu'),
    MaxPooling1D(pool_size=2),
    Flatten(),
    Dense(256, activation='relu'),
    Dropout(0.5),
    Dense(num_labels, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

print("Starting CNN model training...")
model.fit(X_train_cnn, y_train, epochs=20, batch_size=32, validation_data=(X_test_cnn, y_test))
print("CNN model training complete.")
print("\n" + "=" * 40 + "\n")



Step 2: Building and Training the CNN Model
----------------------------------------
Starting CNN model training...
Epoch 1/20


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 15ms/step - accuracy: 0.4967 - loss: 1.4224 - val_accuracy: 0.6637 - val_loss: 1.0316
Epoch 2/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.6695 - loss: 0.9780 - val_accuracy: 0.7167 - val_loss: 0.8422
Epoch 3/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.7320 - loss: 0.7831 - val_accuracy: 0.7588 - val_loss: 0.7128
Epoch 4/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.7804 - loss: 0.6491 - val_accuracy: 0.7863 - val_loss: 0.6249
Epoch 5/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.8174 - loss: 0.5382 - val_accuracy: 0.8143 - val_loss: 0.5781
Epoch 6/20
[1m250/250[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - accuracy: 0.8411 - loss: 0.4620 - val_accuracy: 0.8288 - val_loss: 0.5302
Epoch 7/20
[1m250/250[0m [32m━

In [23]:
# --- Step 3: Evaluation and Saving ---

print("Step 3: Evaluation and Saving")
print("-" * 40)

loss, accuracy = model.evaluate(X_test_cnn, y_test, verbose=0)
print("\nCNN Model Evaluation:")
print(f"Test Accuracy: {accuracy:.4f}")

# Save the entire model
model.save("genre_classifier_cnn_model.h5")
print("\nNew CNN model saved successfully.")

# Also save the scaler and label encoder, as they are still needed for new predictions
joblib.dump(scaler, "scaler_cnn.pkl")
joblib.dump(label_encoder, "label_encoder_cnn.pkl")
print("Scaler and label encoder saved.")
print("\n" + "=" * 40 + "\n")



Step 3: Evaluation and Saving
----------------------------------------





CNN Model Evaluation:
Test Accuracy: 0.8764

New CNN model saved successfully.
Scaler and label encoder saved.




In [24]:
# --- Step 4: Prediction on a New Audio File ---

print("Step 4: Prediction on a New Audio File")
print("-" * 40)

# Load the saved model and other objects
model_loaded = tf.keras.models.load_model("genre_classifier_cnn_model.h5")
scaler_loaded = joblib.load("scaler_cnn.pkl")
label_encoder_loaded = joblib.load("label_encoder_cnn.pkl")

def extract_all_features(file_path):
    """
    Extracts all the necessary features from an audio file to match the training data.
    """
    try:
        y, sr = librosa.load(file_path, duration=30)
        
        features = {}
        
        mfccs = librosa.feature.mfcc(y=y, sr=sr)
        for i in range(mfccs.shape[0]):
            features[f'mfcc{i+1}_mean'] = np.mean(mfccs[i])
            features[f'mfcc{i+1}_var'] = np.var(mfccs[i])
            
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        features['chroma_stft_mean'] = np.mean(chroma)
        features['chroma_stft_var'] = np.var(chroma)
        
        rms = librosa.feature.rms(y=y)
        features['rms_mean'] = np.mean(rms)
        features['rms_var'] = np.var(rms)
        
        spec_cent = librosa.feature.spectral_centroid(y=y, sr=sr)
        features['spectral_centroid_mean'] = np.mean(spec_cent)
        features['spectral_centroid_var'] = np.var(spec_cent)
        
        spec_bw = librosa.feature.spectral_bandwidth(y=y, sr=sr)
        features['spectral_bandwidth_mean'] = np.mean(spec_bw)
        features['spectral_bandwidth_var'] = np.var(spec_bw)



        spec_roll = librosa.feature.spectral_rolloff(y=y, sr=sr)
        features['spectral_rolloff_mean'] = np.mean(spec_roll)
        features['spectral_rolloff_var'] = np.var(spec_roll)
        
        zcr = librosa.feature.zero_crossing_rate(y)
        features['zero_crossing_rate_mean'] = np.mean(zcr)
        features['zero_crossing_rate_var'] = np.var(zcr)
        
        y_harm, y_perc = librosa.effects.hpss(y)
        features['harmony_mean'] = np.mean(y_harm)
        features['harmony_var'] = np.var(y_harm)
        features['perceptr_mean'] = np.mean(y_perc)
        features['perceptr_var'] = np.var(y_perc)
        
        tempo = librosa.beat.tempo(y=y, sr=sr)[0]
        features['tempo'] = tempo

        feature_names = pd.read_csv("features_3_sec.csv").drop(columns=['filename', 'label']).columns.tolist()
        
        new_data_dict = {name: [features.get(name, 0)] for name in feature_names}
        
        new_data_df = pd.DataFrame(new_data_dict)

        return new_data_df

    except Exception as e:
        print(f"Error encountered while parsing file: {file_path}. Error: {e}")
        return None



Step 4: Prediction on a New Audio File
----------------------------------------




In [25]:
# Make sure to provide a valid path to an audio file from your dataset.
new_audio_file = r"C:\Users\HP\Downloads\archive (10)\Data\genres_original\blues\blues.00001.wav"

print(f"Extracting features from: {new_audio_file}")
new_features_df = extract_all_features(new_audio_file)


if new_features_df is not None:
    features_scaled = scaler_loaded.transform(new_features_df)
    features_reshaped = np.expand_dims(features_scaled, axis=2)
    
    predictions = model_loaded.predict(features_reshaped)
    pred_label_num = np.argmax(predictions)
    pred_label = label_encoder_loaded.inverse_transform([pred_label_num])[0]
    
    print(f"\nThe predicted genre for the audio file is: {pred_label}")
else:
    print("Could not make a prediction due to a feature extraction error.")

Extracting features from: C:\Users\HP\Downloads\archive (10)\Data\genres_original\blues\blues.00001.wav


	This function was moved to 'librosa.feature.rhythm.tempo' in librosa version 0.10.0.
	This alias will be removed in librosa version 1.0.
  tempo = librosa.beat.tempo(y=y, sr=sr)[0]


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 151ms/step

The predicted genre for the audio file is: jazz
