<a href="https://colab.research.google.com/github/MangalaPriyadharshini/MangalaPriyadharshini/blob/main/SongClassification.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import zipfile
import io
import numpy as np
import pandas as pd
import librosa
import soundfile as sf
from google.colab import files
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# 1️⃣ Upload ZIP
print("📂 Please upload your dataset ZIP file...")
uploaded = files.upload()

zip_filename = list(uploaded.keys())[0]

# 2️⃣ Extract ZIP
with zipfile.ZipFile(io.BytesIO(uploaded[zip_filename]), 'r') as zip_ref:
    zip_ref.extractall("dataset")

print("✅ Dataset extracted to 'dataset/'")

# 3️⃣ Iterate through separated files (Assume they are already separated OR use demucs first)
separated_dir = "dataset"  # <-- change to "output/htdemucs_ft" if using Demucs
data = []

def extract_features(audio_path, n_mfcc=13):
    try:
        y, sr = librosa.load(audio_path, sr=None, mono=True)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        mfccs_mean = np.mean(mfccs, axis=1)
        return mfccs_mean
    except Exception as e:
        print(f"❌ Error with {audio_path}: {e}")
        return np.zeros(n_mfcc)

# Walk through folders
for class_name in os.listdir(separated_dir):
    class_path = os.path.join(separated_dir, class_name)
    if not os.path.isdir(class_path):
        continue

    for file in os.listdir(class_path):
        if file.endswith(('.wav', '.mp3')):
            file_path = os.path.join(class_path, file)
            feats = extract_features(file_path)
            data.append({"features": feats.tolist(), "class": class_name})

# Create DataFrame
df = pd.DataFrame(data)

if len(df) > 1:
    X = np.array(df["features"].tolist())
    y = df["class"]

    # Encode labels
    encoder = LabelEncoder()
    y_enc = encoder.fit_transform(y)

    # Train/Test split
    X_train, X_test, y_train, y_test = train_test_split(X, y_enc, test_size=0.2, random_state=42)

    # Train model
    clf = RandomForestClassifier(n_estimators=100, random_state=42)
    clf.fit(X_train, y_train)

    y_pred = clf.predict(X)
    df["predicted_class"] = encoder.inverse_transform(y_pred)

    acc = accuracy_score(y_enc, y_pred)
    print(f"✅ Model trained. Accuracy on full data: {acc:.2f}")

else:
    print("⚠️ Only one sample found — skipping model training.")
    df["predicted_class"] = df["class"]

# Save CSV
csv_output = "/content/dataset.csv"
df.to_csv(csv_output, index=False)
print(f"📄 Dataset with predictions saved to {csv_output}")
# Show all rows
pd.set_option('display.max_rows', None)
print(df)


📂 Please upload your dataset ZIP file...


Saving Rap song.zip to Rap song.zip
✅ Dataset extracted to 'dataset/'
✅ Model trained. Accuracy on full data: 1.00
📄 Dataset with predictions saved to /content/dataset.csv
                                             features     class  \
0   [-118.10102844238281, 120.36821746826172, -10....  Rap song   
1   [-136.4287567138672, 190.9777069091797, -74.74...  Rap song   
2   [-141.9781036376953, 167.7129669189453, -40.92...  Rap song   
3   [-110.21543884277344, 127.51097869873047, -4.3...  Rap song   
4   [-105.31147003173828, 150.2806854248047, -63.4...  Rap song   
5   [-106.83342742919922, 136.09011840820312, -18....  Rap song   
6   [-158.21389770507812, 114.6207275390625, -29.4...  Rap song   
7   [-74.12935638427734, 127.3144302368164, -41.24...  Rap song   
8   [-202.7742919921875, 166.3975372314453, -12.33...  Rap song   
9   [-131.81301879882812, 136.8538055419922, -21.3...  Rap song   
10  [-153.32447814941406, 105.63259887695312, -8.6...  Rap song   
11  [-251.11509704589844