#This is a Python Code to Train and Test the Labelled Data using Random Forest and to Predict the 115 MFCC.csv files.
#Note : This Code requires the Labelled mp3 Data Files and MFCC.csv files to be dowloaded, preferably on your Google Drive.

Importing the required Libraries

In [None]:
from zipfile import ZipFile
import os
import shutil
import numpy as np
import librosa
import random
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import joblib
import pandas as pd

Mounting to Google Drive

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Extracting MFCC.csv files. In my case 01-MFCC.zip is the folder in my Drive where all 115 MFCC.csv files are present


In [None]:
with ZipFile('drive/MyDrive/01-MFCC.zip', 'r') as zipObj:
   # Extract all the contents of zip file in current directory
   zipObj.extractall()

Copying all the songs to /content

In [None]:

# Define the base path to your songs in Google Drive
drive_path = '/content/drive/MyDrive/Project_Files'
songs = [
    "BhavGeet_1.mp3", "BhavGeet_2.mp3", "BhavGeet_3.mp3", "BhavGeet_4.mp3","BhavGeet_5.mp3", "BhavGeet_6.mp3", "BhavGeet_7.mp3", "BhavGeet_8.mp3","BhavGeet_9.mp3", "BhavGeet_10.mp3", "BhavGeet_11.mp3", "BhavGeet_12.mp3","BhavGeet_13.mp3", "BhavGeet_14.mp3", "BhavGeet_15.mp3","BhavGeet_16.mp3", "BhavGeet_17.mp3", "BhavGeet_18.mp3","BhavGeet_19.mp3", "BhavGeet_20.mp3", "BhavGeet_21.mp3", "BhavGeet_22.mp3",
    "Lavni_1.mp3", "Lavni_2.mp3", "Lavni_3.mp3", "Lavni_4.mp3","Lavni_5.mp3","Lavni_6.mp3","Lavni_7.mp3", "Lavni_8.mp3", "Lavni_9.mp3","Lavni_10.mp3","Lavni_11.mp3", "Lavni_12.mp3", "Lavni_13.mp3", "Lavni_14.mp3","Lavni_15.mp3","Lavni_16.mp3","Lavni_17.mp3", "Lavni_18.mp3", "Lavni_19.mp3","Lavni_20.mp3","Lavni_21.mp3", "Lavni_22.mp3", "Lavni_23.mp3",
    "AshaBhosale_1.mp3", "AshaBhosale_2.mp3", "AshaBhosale_3.mp3", "AshaBhosale_4.mp3","AshaBhosale_5.mp3","AshaBhosale_6.mp3","AshaBhosale_7.mp3", "AshaBhosale_8.mp3", "AshaBhosale_9.mp3","AshaBhosale_10.mp3","AshaBhosale_11.mp3", "AshaBhosale_12.mp3","AshaBhosale_13.mp3", "AshaBhosale_14.mp3", "AshaBhosale_15.mp3","AshaBhosale_16.mp3","AshaBhosale_17.mp3", "AshaBhosale_18.mp3", "AshaBhosale_19.mp3","AshaBhosale_20.mp3","AshaBhosale_21.mp3", "AshaBhosale_22.mp3",
    "Kishor_1.mp3", "Kishor_2.mp3", "Kishor_3.mp3", "Kishor_4.mp3", "Kishor_5.mp3", "Kishor_6.mp3","Kishor_7.mp3", "Kishor_8.mp3", "Kishor_9.mp3","Kishor_10.mp3","Kishor_11.mp3", "Kishor_12.mp3","Kishor_13.mp3", "Kishor_14.mp3", "Kishor_15.mp3","Kishor_16.mp3","Kishor_17.mp3", "Kishor_18.mp3", "Kishor_19.mp3","Kishor_20.mp3","Kishor_21.mp3", "Kishor_22.mp3",
    "MJ_1.mp3", "MJ_2.mp3", "MJ_3.mp3", "MJ_4.mp3", "MJ_5.mp3", "MJ_6.mp3","MJ_7.mp3", "MJ_8.mp3", "MJ_9.mp3","MJ_10.mp3","MJ_11.mp3", "MJ_12.mp3","MJ_13.mp3", "MJ_14.mp3", "MJ_15.mp3", "MJ_16.mp3","MJ_17.mp3", "MJ_18.mp3", "MJ_19.mp3","MJ_20.mp3","MJ_21.mp3", "MJ_22.mp3"

]

# Copy each song to the current working directory
for song in songs:
    src = os.path.join(drive_path, song)
    dst = os.path.join('/content', song)
    shutil.copy(src, dst)
    print(f"Copied {song} to /content")


Copied BhavGeet_1.mp3 to /content
Copied BhavGeet_2.mp3 to /content
Copied BhavGeet_3.mp3 to /content
Copied BhavGeet_4.mp3 to /content
Copied BhavGeet_5.mp3 to /content
Copied BhavGeet_6.mp3 to /content
Copied BhavGeet_7.mp3 to /content
Copied BhavGeet_8.mp3 to /content
Copied BhavGeet_9.mp3 to /content
Copied BhavGeet_10.mp3 to /content
Copied BhavGeet_11.mp3 to /content
Copied BhavGeet_12.mp3 to /content
Copied BhavGeet_13.mp3 to /content
Copied BhavGeet_14.mp3 to /content
Copied BhavGeet_15.mp3 to /content
Copied BhavGeet_16.mp3 to /content
Copied BhavGeet_17.mp3 to /content
Copied BhavGeet_18.mp3 to /content
Copied BhavGeet_19.mp3 to /content
Copied BhavGeet_20.mp3 to /content
Copied BhavGeet_21.mp3 to /content
Copied BhavGeet_22.mp3 to /content
Copied Lavni_1.mp3 to /content
Copied Lavni_2.mp3 to /content
Copied Lavni_3.mp3 to /content
Copied Lavni_4.mp3 to /content
Copied Lavni_5.mp3 to /content
Copied Lavni_6.mp3 to /content
Copied Lavni_7.mp3 to /content
Copied Lavni_8.mp3 to 

PreProcessing Data

In [None]:
# Function to extract MFCC features using a sliding window approach and sample n_frames
def extract_and_sample_mfcc(file_path, n_mfcc=17, sr=44100, window_size= 60, hop_length=15, n_frames=3000):
    y, _ = librosa.load(file_path, sr=sr)
    mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)

    num_frames = mfccs.shape[1]
    output = []

    # Use a sliding window approach to form input features
    for i in range(0, num_frames - window_size + 1, hop_length):
        window = mfccs[:, i:i + window_size].T  # Shape: (window_size x n_mfcc)
        output.append(window)

    output = np.array(output)

    # If we have more than n_frames, sample randomly
    if len(output) > n_frames:
        idx = np.sort(np.random.choice(len(output), n_frames, replace=False))
        output = output[idx]

    return output.reshape(-1, n_mfcc * window_size)  # Flatten the windows for Random Forest input

# Define the categories and their songs (adjusted without MP4 confusion)
categories = {
    "Marathi_Bhav_Geet": [ "BhavGeet_1.mp3", "BhavGeet_2.mp3", "BhavGeet_3.mp3", "BhavGeet_4.mp3","BhavGeet_5.mp3", "BhavGeet_6.mp3", "BhavGeet_7.mp3", "BhavGeet_8.mp3","BhavGeet_9.mp3", "BhavGeet_10.mp3", "BhavGeet_11.mp3", "BhavGeet_12.mp3","BhavGeet_13.mp3", "BhavGeet_14.mp3", "BhavGeet_15.mp3","BhavGeet_16.mp3", "BhavGeet_17.mp3", "BhavGeet_18.mp3","BhavGeet_19.mp3", "BhavGeet_20.mp3", "BhavGeet_21.mp3", "BhavGeet_22.mp3"],
    "Marathi_Lavni": ["Lavni_1.mp3", "Lavni_2.mp3", "Lavni_3.mp3", "Lavni_4.mp3","Lavni_5.mp3","Lavni_6.mp3","Lavni_7.mp3", "Lavni_8.mp3", "Lavni_9.mp3","Lavni_10.mp3","Lavni_11.mp3", "Lavni_12.mp3", "Lavni_13.mp3", "Lavni_14.mp3","Lavni_15.mp3","Lavni_16.mp3","Lavni_17.mp3", "Lavni_18.mp3", "Lavni_19.mp3","Lavni_20.mp3","Lavni_21.mp3", "Lavni_22.mp3", "Lavni_23.mp3"],
    "Asha_Bhosale": ["AshaBhosale_1.mp3", "AshaBhosale_2.mp3", "AshaBhosale_3.mp3", "AshaBhosale_4.mp3","AshaBhosale_5.mp3","AshaBhosale_6.mp3","AshaBhosale_7.mp3", "AshaBhosale_8.mp3", "AshaBhosale_9.mp3","AshaBhosale_10.mp3","AshaBhosale_11.mp3", "AshaBhosale_12.mp3","AshaBhosale_13.mp3", "AshaBhosale_14.mp3", "AshaBhosale_15.mp3","AshaBhosale_16.mp3","AshaBhosale_17.mp3", "AshaBhosale_18.mp3", "AshaBhosale_19.mp3","AshaBhosale_20.mp3","AshaBhosale_21.mp3", "AshaBhosale_22.mp3"],
    "Kishor_Kumar": ["Kishor_1.mp3", "Kishor_2.mp3", "Kishor_3.mp3", "Kishor_4.mp3", "Kishor_5.mp3", "Kishor_6.mp3","Kishor_7.mp3", "Kishor_8.mp3", "Kishor_9.mp3","Kishor_10.mp3","Kishor_11.mp3", "Kishor_12.mp3","Kishor_13.mp3", "Kishor_14.mp3", "Kishor_15.mp3","Kishor_16.mp3","Kishor_17.mp3", "Kishor_18.mp3", "Kishor_19.mp3","Kishor_20.mp3","Kishor_21.mp3", "Kishor_22.mp3"],
    "Michael_Jackson": ["MJ_1.mp3", "MJ_2.mp3", "MJ_3.mp3", "MJ_4.mp3", "MJ_5.mp3", "MJ_6.mp3","MJ_7.mp3", "MJ_8.mp3", "MJ_9.mp3","MJ_10.mp3","MJ_11.mp3", "MJ_12.mp3","MJ_13.mp3", "MJ_14.mp3", "MJ_15.mp3", "MJ_16.mp3","MJ_17.mp3", "MJ_18.mp3", "MJ_19.mp3","MJ_20.mp3","MJ_21.mp3", "MJ_22.mp3"]

}



Splitting Training and Testing Data and feeding them to the Model and Training Model

In [None]:
# Initialize lists to store training and testing data and labels
X_train_rf = []
y_train_rf = []
X_test_rf = []
y_test_rf = []

# Process each category and extract features
for label_idx, (label_name, songs) in enumerate(categories.items()):
    # Randomly select 1 song for testing and use the rest for training
    test_songs = random.sample(songs, 2)
    train_songs = [song for song in songs if song not in test_songs]

    # Process test songs
    for song in test_songs:
        file_path = os.path.join('/content', song)  # Adjust path as needed for your environment
        print(f"Processing {song} for testing...")
        mfcc_windows = extract_and_sample_mfcc(file_path)
        X_test_rf.append(mfcc_windows)
        y_test_rf.append([label_idx] * len(mfcc_windows))  # Assign label to each window

    # Process train songs
    for song in train_songs:
        file_path = os.path.join('/content', song)  # Adjust path as needed for your environment
        print(f"Processing {song} for training...")
        mfcc_windows = extract_and_sample_mfcc(file_path)
        X_train_rf.append(mfcc_windows)
        y_train_rf.append([label_idx] * len(mfcc_windows))  # Assign label to each window

# Convert lists to NumPy arrays and reshape them appropriately for Random Forest input (samples x features)
X_train_rf = np.vstack(X_train_rf)  # Shape: (num_samples x flattened features)
y_train_rf = np.hstack(y_train_rf)

X_test_rf = np.vstack(X_test_rf)   # Shape: (num_samples x flattened features)
y_test_rf = np.hstack(y_test_rf)

# Normalize the data using StandardScaler
scaler_rf = StandardScaler()
X_train_rf_scaled = scaler_rf.fit_transform(X_train_rf)
X_test_rf_scaled = scaler_rf.transform(X_test_rf)

print("Training data shape:", X_train_rf_scaled.shape)
print("Testing data shape:", X_test_rf_scaled.shape)

# Build and train Random Forest classifier
rf_model = RandomForestClassifier(n_estimators=150)   # Using a forest of 100 trees
rf_model.fit(X_train_rf_scaled, y_train_rf)

# Make predictions on the test set
y_pred_rf = rf_model.predict(X_test_rf_scaled)



Processing BhavGeet_8.mp3 for testing...
Processing BhavGeet_10.mp3 for testing...
Processing BhavGeet_1.mp3 for training...
Processing BhavGeet_2.mp3 for training...
Processing BhavGeet_3.mp3 for training...
Processing BhavGeet_4.mp3 for training...
Processing BhavGeet_5.mp3 for training...
Processing BhavGeet_6.mp3 for training...
Processing BhavGeet_7.mp3 for training...
Processing BhavGeet_9.mp3 for training...
Processing BhavGeet_11.mp3 for training...
Processing BhavGeet_12.mp3 for training...
Processing BhavGeet_13.mp3 for training...
Processing BhavGeet_14.mp3 for training...
Processing BhavGeet_15.mp3 for training...
Processing BhavGeet_16.mp3 for training...
Processing BhavGeet_17.mp3 for training...
Processing BhavGeet_18.mp3 for training...
Processing BhavGeet_19.mp3 for training...
Processing BhavGeet_20.mp3 for training...
Processing BhavGeet_21.mp3 for training...
Processing BhavGeet_22.mp3 for training...
Processing Lavni_19.mp3 for testing...


  y, _ = librosa.load(file_path, sr=sr)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Processing Lavni_22.mp3 for testing...
Processing Lavni_1.mp3 for training...
Processing Lavni_2.mp3 for training...
Processing Lavni_3.mp3 for training...
Processing Lavni_4.mp3 for training...
Processing Lavni_5.mp3 for training...
Processing Lavni_6.mp3 for training...
Processing Lavni_7.mp3 for training...
Processing Lavni_8.mp3 for training...
Processing Lavni_9.mp3 for training...
Processing Lavni_10.mp3 for training...
Processing Lavni_11.mp3 for training...
Processing Lavni_12.mp3 for training...
Processing Lavni_13.mp3 for training...
Processing Lavni_14.mp3 for training...
Processing Lavni_15.mp3 for training...
Processing Lavni_16.mp3 for training...
Processing Lavni_17.mp3 for training...
Processing Lavni_18.mp3 for training...
Processing Lavni_20.mp3 for training...
Processing Lavni_21.mp3 for training...
Processing Lavni_23.mp3 for training...


  y, _ = librosa.load(file_path, sr=sr)
	Deprecated as of librosa version 0.10.0.
	It will be removed in librosa version 1.0.
  y, sr_native = __audioread_load(path, offset, duration, dtype)


Processing AshaBhosale_12.mp3 for testing...
Processing AshaBhosale_3.mp3 for testing...
Processing AshaBhosale_1.mp3 for training...
Processing AshaBhosale_2.mp3 for training...
Processing AshaBhosale_4.mp3 for training...
Processing AshaBhosale_5.mp3 for training...
Processing AshaBhosale_6.mp3 for training...
Processing AshaBhosale_7.mp3 for training...
Processing AshaBhosale_8.mp3 for training...
Processing AshaBhosale_9.mp3 for training...
Processing AshaBhosale_10.mp3 for training...
Processing AshaBhosale_11.mp3 for training...
Processing AshaBhosale_13.mp3 for training...
Processing AshaBhosale_14.mp3 for training...
Processing AshaBhosale_15.mp3 for training...
Processing AshaBhosale_16.mp3 for training...
Processing AshaBhosale_17.mp3 for training...
Processing AshaBhosale_18.mp3 for training...
Processing AshaBhosale_19.mp3 for training...
Processing AshaBhosale_20.mp3 for training...
Processing AshaBhosale_21.mp3 for training...
Processing AshaBhosale_22.mp3 for training...

Accuracy Check
#Note, This Accuracy is excluding the National Anthem Category. National Anthem Category is seperately classified. if number of frames in the song < 12000 that song is directly classsified in National Anthem Category



In [None]:
# Evaluate accuracy on test set
test_accuracy_rf = accuracy_score(y_test_rf, y_pred_rf)
print(f"Test accuracy (Random Forest): {test_accuracy_rf}")



Test accuracy (Random Forest): 0.6934004770739465


Save the Model and Scaler

In [None]:
# Save the Random Forest model and scaler for future use
joblib.dump(rf_model, 'random_forest_model.pkl')
joblib.dump(scaler_rf, 'scaler_rf.pkl')

['scaler_rf.pkl']

PreProcessing the MFCC.csv files

In [None]:
# Define categories for labeling predictions
categories = {
    "Marathi_Bhav_Geet": 0,
    "Marathi_Lavni": 1,
    "Asha_Bhosale": 2,
    "Kishor_Kumar": 3,
    "Michael_Jackson": 4
}
categories_list = list(categories.keys())
categories_list.insert(0, "Indian_National_Anthem")  # Add Indian National Anthem as category 0

# Function to apply sliding window on MFCC data from CSV files
def apply_sliding_window_and_sample(df, n_mfcc=17, window_size=60, hop_length=15, n_frames=3000):
    # df is already loaded with shape (20, num_frames)
    mfccs = df.values[:n_mfcc, :]  # Take top n_mfcc coefficients (first 17 rows)

    num_frames = mfccs.shape[1]
    output = []

    # Apply sliding window approach
    for i in range(0, num_frames - window_size + 1, hop_length):
        window = mfccs[:, i:i + window_size].T  # Shape: (window_size x n_mfcc)
        output.append(window)

    output = np.array(output)

    # If we have more than n_frames, sample randomly
    if len(output) > n_frames:
        idx = np.sort(np.random.choice(len(output), n_frames, replace=False))
        output = output[idx]

    return output.reshape(-1, n_mfcc * window_size)  # Flatten for model input


Copying MFCC.csv files to the /content

In [None]:

# Load MFCC CSV files from directory (adjust path as needed)
dataframes = {}
for filename in os.listdir('/content'):  # Adjust path accordingly
    if filename.endswith("-MFCC.csv"):
        try:
            df = pd.read_csv(os.path.join('/content', filename), header=None)
            dataframes[filename] = df
            print(f"Loaded {filename}: shape {df.shape}")
        except Exception as e:
            print(f"Error loading {filename}: {e}")



Loaded 48-MFCC.csv: shape (20, 28173)
Loaded 88-MFCC.csv: shape (20, 25397)
Loaded 03-MFCC.csv: shape (20, 24716)
Loaded 54-MFCC.csv: shape (20, 22411)
Loaded 65-MFCC.csv: shape (20, 20270)
Loaded 67-MFCC.csv: shape (20, 9434)
Loaded 110-MFCC.csv: shape (20, 24658)
Loaded 26-MFCC.csv: shape (20, 17963)
Loaded 24-MFCC.csv: shape (20, 25308)
Loaded 37-MFCC.csv: shape (20, 16860)
Loaded 103-MFCC.csv: shape (20, 34368)
Loaded 87-MFCC.csv: shape (20, 9200)
Loaded 09-MFCC.csv: shape (20, 27241)
Loaded 91-MFCC.csv: shape (20, 20121)
Loaded 93-MFCC.csv: shape (20, 19700)
Loaded 13-MFCC.csv: shape (20, 33329)
Loaded 75-MFCC.csv: shape (20, 6064)
Loaded 66-MFCC.csv: shape (20, 10090)
Loaded 31-MFCC.csv: shape (20, 10921)
Loaded 60-MFCC.csv: shape (20, 22452)
Loaded 10-MFCC.csv: shape (20, 30065)
Loaded 07-MFCC.csv: shape (20, 18110)
Loaded 104-MFCC.csv: shape (20, 34536)
Loaded 20-MFCC.csv: shape (20, 25608)
Loaded 42-MFCC.csv: shape (20, 18250)
Loaded 08-MFCC.csv: shape (20, 22615)
Loaded 85-MF

Make predictions of MFCC.csv files using the saved Model and Scaler

In [None]:
# Process predictions on MFCC CSV files
song_predictions = {}

for filename, df in dataframes.items():
    print(f"Processing {filename}...")

    # Check if it's an Indian National Anthem based on frame count (columns < 12000)
    if df.shape[1] < 12000:
        song_predictions[filename] = "Indian_National_Anthem"
        print(f"Predicted category for {filename}: Indian_National_Anthem (based on frame count)")
        continue

    # Apply sliding window and sample frames (same as done during training)
    mfcc_windows = apply_sliding_window_and_sample(df)

    # Scale MFCC frames using the trained scaler
    mfcc_windows_scaled = scaler_rf.transform(mfcc_windows)

    # Predict category using Random Forest model for all windows and take majority vote
    predictions = rf_model.predict(mfcc_windows_scaled)

    # Use majority voting across all windows to decide final category for this song
    predicted_label_idx = np.bincount(predictions).argmax()

    predicted_category_name = categories_list[predicted_label_idx + 1]  # +1 to account for National Anthem at index 0

    song_predictions[filename] = predicted_category_name

    print(f"Predicted category for {filename}: {predicted_category_name}")

# Output final predictions for all songs
print("\nFinal Song Predictions:")
for filename, category in song_predictions.items():
    print(f"{filename}: {category}")

Processing 48-MFCC.csv...
Predicted category for 48-MFCC.csv: Kishor_Kumar
Processing 88-MFCC.csv...
Predicted category for 88-MFCC.csv: Marathi_Lavni
Processing 03-MFCC.csv...
Predicted category for 03-MFCC.csv: Michael_Jackson
Processing 54-MFCC.csv...
Predicted category for 54-MFCC.csv: Marathi_Lavni
Processing 65-MFCC.csv...
Predicted category for 65-MFCC.csv: Marathi_Lavni
Processing 67-MFCC.csv...
Predicted category for 67-MFCC.csv: Indian_National_Anthem (based on frame count)
Processing 110-MFCC.csv...
Predicted category for 110-MFCC.csv: Asha_Bhosale
Processing 26-MFCC.csv...
Predicted category for 26-MFCC.csv: Marathi_Bhav_Geet
Processing 24-MFCC.csv...
Predicted category for 24-MFCC.csv: Michael_Jackson
Processing 37-MFCC.csv...
Predicted category for 37-MFCC.csv: Michael_Jackson
Processing 103-MFCC.csv...
Predicted category for 103-MFCC.csv: Michael_Jackson
Processing 87-MFCC.csv...
Predicted category for 87-MFCC.csv: Indian_National_Anthem (based on frame count)
Processing