In [1]:
import os
import numpy as np
import json
import librosa
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.utils import resample
from sklearn.metrics import accuracy_score
import warnings
warnings.filterwarnings("ignore")

# ========== Feature Extraction with Additional Features ==========
def extract_features_and_label(base_path):
    try:
        pitch_file = base_path + ".pitch.txt"
        tonic_file = base_path + ".ctonic.txt"
        json_file = base_path + ".json"

        if not all(os.path.exists(f) for f in [pitch_file, tonic_file, json_file]):
            print(f"Missing required file(s) for {base_path}. Skipping...")
            return None, None

        # Load pitch data
        pitch_data = np.loadtxt(pitch_file)
        if pitch_data.shape[0] == 0:  # Empty pitch data
            print(f"Empty pitch data for {base_path}. Skipping...")
            return None, None
        
        freqs = pitch_data[:, 1]
        freqs = np.where(freqs == 0, np.nan, freqs)

        # Load tonic value
        with open(tonic_file) as f:
            tonic = f.read().strip()
        
        if not tonic or tonic == '0':  # Check for missing or invalid tonic
            print(f"Invalid tonic data for {base_path}. Skipping...")
            return None, None
        
        tonic = float(tonic)

        # Normalize pitch data
        norm_pitch = 1200 * np.log2(freqs / tonic)
        norm_pitch = norm_pitch[~np.isnan(norm_pitch)]

        # If pitch data is too short, skip this file
        if len(norm_pitch) < 100:
            print(f"Insufficient pitch data for {base_path}. Skipping...")
            return None, None

        # Create histogram of normalized pitch values
        hist, _ = np.histogram(norm_pitch, bins=60, range=(-600, 1200), density=True)

        # Extract MFCCs
        y, sr = librosa.load(base_path + ".wav", sr=None)
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfccs_mean = np.mean(mfccs, axis=1)

        # Combine histogram and MFCC features
        features = np.concatenate((hist, mfccs_mean))
        
        # Load raga label from metadata
        with open(json_file) as f:
            meta = json.load(f)

        raga = meta.get("raaga", [{}])[0].get("name")
        return features, raga if raga else (None, None)

    except Exception as e:
        print(f"Error in feature extraction: {e}")
        return None, None


# ========== Dataset Loader with Balanced Sampling ==========
def load_data_from_directory(data_dir):
    X, y = [], []
    for root, dirs, files in os.walk(data_dir):
        for file in files:
            if file.endswith(".json"):
                base_name = os.path.splitext(file)[0]
                base_path = os.path.join(root, base_name)
                features, label = extract_features_and_label(base_path)
                if features is not None:
                    X.append(features)
                    y.append(label)

    X, y = np.array(X), np.array(y)

    # Handle class imbalance (oversample minor classes like 'Kamās')
    X_resampled, y_resampled = resample(X, y, replace=True, n_samples=2000, random_state=42)
    
    return X_resampled, y_resampled

# ========== Hyperparameter Tuning Using GridSearch ==========
def train_model(X, y):
    clf = RandomForestClassifier(random_state=42)
    
    param_grid = {
        'n_estimators': [100, 200, 300],
        'max_depth': [10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }

    grid_search = GridSearchCV(clf, param_grid, cv=5, n_jobs=-1)
    grid_search.fit(X, y)
    
    print(f"Best Hyperparameters: {grid_search.best_params_}")
    return grid_search.best_estimator_

# ========== Predict from Audio ==========
def predict_raga_from_audio(audio_path, clf):
    try:
        y, sr = librosa.load(audio_path, sr=None)

        # Better pitch extraction using YIN
        pitch_values = librosa.yin(y, fmin=librosa.note_to_hz('C2'), 
                                   fmax=librosa.note_to_hz('C7'), sr=sr)
        pitch_values = pitch_values[~np.isnan(pitch_values)]

        if len(pitch_values) < 100:
            print("⚠️ Audio too short or unclear for prediction.")
            return

        # Estimate tonic using histogram mode
        tonic_hist, tonic_bins = np.histogram(np.log2(pitch_values), bins=100)
        tonic_guess = 2 ** tonic_bins[np.argmax(tonic_hist)]

        norm_pitch = 1200 * np.log2(pitch_values / tonic_guess)
        hist, _ = np.histogram(norm_pitch, bins=60, range=(-600, 1200), density=True)

        # Extract MFCCs
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        mfccs_mean = np.mean(mfccs, axis=1)

        # Combine histogram and MFCC features
        features = np.concatenate((hist, mfccs_mean))

        # Predict
        probs = clf.predict_proba([features])[0]
        top_indices = np.argsort(probs)[-3:][::-1]
        labels = clf.classes_

        print("\n🎧 Top 3 Raga Predictions:")
        for i in top_indices:
            predicted_raga = labels[i]
            print(f"→ {predicted_raga}: {probs[i]*100:.1f}%")

    except Exception as e:
        print(f"❌ Error processing audio: {e}")

# ========== Main ==========
DATA_DIR = "D:/carnatic/carnatic"  # Path to Saraga Carnatic folder
X, y = load_data_from_directory(DATA_DIR)

if len(X) > 0:
    # Train the model with hyperparameter tuning
    clf = train_model(X, y)

    # Ask user for audio file path
    audio_path = input("🎵 Enter path to your audio file (.mp3 or .wav): ").strip()
    if os.path.exists(audio_path):
        predict_raga_from_audio(audio_path, clf)
    else:
        print("❌ File not found. Please check the path.")
else:
    print("❌ Model training failed. Check dataset path and files.")


Error in feature extraction: [Errno 2] No such file or directory: 'D:/carnatic/carnatic\\0\\Cherthala Ranganatha Sharma - Varashiki Vahana.wav'
Error in feature extraction: [Errno 2] No such file or directory: 'D:/carnatic/carnatic\\1\\Cherthala Ranganatha Sharma - Bhuvini Dasudane.wav'
Error in feature extraction: [Errno 2] No such file or directory: 'D:/carnatic/carnatic\\10\\Sanjay Subrahmanyan - Pullum Silambena Kan.wav'
Error in feature extraction: [Errno 2] No such file or directory: 'D:/carnatic/carnatic\\100\\Modhumudi Sudhakar - Telisi Rama.wav'
Error in feature extraction: [Errno 2] No such file or directory: 'D:/carnatic/carnatic\\101\\Modhumudi Sudhakar - Sundari Nee Divya.wav'
Error in feature extraction: [Errno 2] No such file or directory: 'D:/carnatic/carnatic\\102\\Modhumudi Sudhakar - Rama Namam Bhajare.wav'
Error in feature extraction: [Errno 2] No such file or directory: 'D:/carnatic/carnatic\\103\\Modhumudi Sudhakar - Ghandhamu Poyyaruga.wav'
Error in feature extra

ValueError: high <= 0

In [3]:
import os
import numpy as np
import json
import librosa
from collections import Counter
from sklearn.ensemble import RandomForestClassifier
import warnings

warnings.filterwarnings("ignore")

# ——— 1. FEATURE EXTRACTION ———
def extract_features_and_label(base_path):
    pitch_file = base_path + ".pitch.txt"
    tonic_file = base_path + ".ctonic.txt"
    meta_file  = base_path + ".json"

    # must have all three
    if not (os.path.isfile(pitch_file) and os.path.isfile(tonic_file) and os.path.isfile(meta_file)):
        return None, None

    # load pitch
    try:
        data = np.loadtxt(pitch_file)
    except:
        return None, None
    if data.ndim != 2 or data.shape[0] < 2:
        return None, None

    freqs = data[:,1]
    freqs = freqs[freqs>0]

    # load tonic
    try:
        with open(tonic_file) as f:
            tonic = float(f.read().strip())
    except:
        return None, None

    # normalize to cents
    cents = 1200 * np.log2(freqs / tonic)
    if len(cents) < 100:
        return None, None

    # histogram → 60‑dim feature
    hist, _ = np.histogram(cents, bins=60, range=(-600,1200), density=True)

    # load raga label
    try:
        with open(meta_file) as f:
            meta = json.load(f)
        label = meta["raaga"][0]["name"]
    except:
        return None, None

    return hist, label

# ——— 2. LOAD TRAINING DATA ———
def load_dataset(root_dir):
    X, y = [], []
    for rd, _, files in os.walk(root_dir):
        for fn in files:
            if fn.lower().endswith(".json"):
                base = os.path.join(rd, fn[:-5])
                feat, lbl = extract_features_and_label(base)
                if feat is not None:
                    X.append(feat); y.append(lbl)
    return np.array(X), np.array(y)

DATA_DIR = "D:/carnatic/carnatic"
X, y = load_dataset(DATA_DIR)
print("✅ Samples:", len(X))
print("🎵 Unique ragas:", len(set(y)))
print("🎯 Counts:", Counter(y))

# ——— 3. TRAIN MODEL ———
clf = RandomForestClassifier(
    n_estimators=200,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)
clf.fit(X, y)

# ——— 4. RAGA DESCRIPTIONS ———
raga_descriptions = {
    "Kamās":      "A gentle, sweet raga—often devotional, expressing longing.",
    "Kalyāṇi":    "A bright, devotional Melakarta raga, evoking auspiciousness.",
    "Tōḍi":       "Deeply emotional Melakarta raga, performed in early morning.",
    "Gaula":      "A meditative janya raga with a calm, devotional character.",
    # … add all your ragas here …
}

# ——— 5. PREDICTION FROM MP3/WAV ———
def predict_raga(audio_path):
    y_audio, sr = librosa.load(audio_path, sr=None)
    pitches = librosa.yin(
        y_audio,
        fmin=librosa.note_to_hz("C2"),
        fmax=librosa.note_to_hz("C7"),
        sr=sr
    )
    pitches = pitches[~np.isnan(pitches)]
    if len(pitches) < 100:
        print("⚠️ Audio too short or noisy for reliable pitch extraction.")
        return

    # tonic estimation
    h, bins = np.histogram(np.log2(pitches), bins=100)
    tonic = 2 ** bins[np.argmax(h)]

    cents = 1200 * np.log2(pitches / tonic)
    hist, _ = np.histogram(cents, bins=60, range=(-600,1200), density=True)

    # pad/truncate to 60
    if len(hist) < 60:
        hist = np.pad(hist, (0, 60-len(hist)), 'constant')
    else:
        hist = hist[:60]

    pred = clf.predict([hist])[0]
    print(f"\n🎧 Predicted Raga: {pred}")
    print("📖", raga_descriptions.get(pred, "Description not available."))

# ——— 6. RUN ———
path = input("🎵 Enter path to your audio file (.mp3 or .wav): ").strip()
if os.path.isfile(path):
    predict_raga(path)
else:
    print("❌ File not found. Please check the path and try again.")


✅ Samples: 184
🎵 Unique ragas: 96
🎯 Counts: Counter({'Rāgamālika': 8, 'Saurāṣtraṁ': 7, 'Tōḍi': 7, 'Kamās': 7, 'Bhairavi': 5, 'Behāg': 5, 'Kalyāṇi': 4, 'Suraṭi': 4, 'Ṣanmukhapriya': 4, 'Rītigauḷa': 4, 'Mōhanaṁ': 4, 'Śankarābharaṇaṁ': 3, 'Bēgaḍa': 3, 'Kāṁbhōji': 3, 'Sāvēri': 3, 'Jōnpuri': 3, 'Sindhubhairavi': 3, 'Harikāmbhōji': 3, 'Kānaḍa': 3, 'Śrīranjani': 2, 'Pūrṇacandrika': 2, 'Madhyamāvati': 2, 'Nādanāmakriya': 2, 'Lalita': 2, 'Nāṭa': 2, 'Kalgaḍa': 2, 'Jaganmōhini': 2, 'Hamīr kaḷyaṇi': 2, 'Pāḍi': 2, 'Amṛtavarṣiṇi': 2, 'Maṇirangu': 2, 'Śuddadhanyāsi': 2, 'Kuntalavarāḷi': 2, 'Kāpi': 2, 'Ābhōgi': 2, 'Karṇāṭaka dēvagāndhāri': 2, 'Kumudakriyā': 2, 'Karaharapriya': 2, 'Gauḷa': 2, 'Mārgahindōḷaṁ': 2, 'Kēdāragauḷa': 2, 'Sahānā': 2, 'Nīlāṁbari': 2, 'Puṇṇāgavarāḷi': 1, 'Kathanakutūhalaṁ': 1, 'Sālaga bhairavi': 1, 'Kalāvati': 1, 'Dvijāvanti': 1, 'Kedāraṁ': 1, 'Cakravākaṁ': 1, 'Sāranga': 1, 'Sarasvatī manōhari': 1, 'Sāma': 1, 'Gamakakriyā': 1, 'Sindhumandāri': 1, 'Dhanyāsi': 1, 'Ānandabhairavi':

🎵 Enter path to your audio file (.mp3 or .wav):  D:\carnatic\carnatic\33\Sanjay Subrahmanyan - Karupuram Naarumo.mp3



🎧 Predicted Raga: Kalyāṇi
📖 A bright, devotional Melakarta raga, evoking auspiciousness.


In [15]:
import os
import numpy as np
import json
import librosa
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from collections import Counter
import warnings

warnings.filterwarnings("ignore")

# ——— 1) FEATURE EXTRACTION (60‑bin histogram only) ———
def extract_hist(base_path):
    p_file   = base_path + ".pitch.txt"
    t_file   = base_path + ".ctonic.txt"
    j_file   = base_path + ".json"
    if not all(os.path.isfile(f) for f in (p_file, t_file, j_file)):
        return None, None

    # load pitch
    try:
        data = np.loadtxt(p_file)
        freqs = data[:,1]
        freqs = freqs[freqs > 0]
    except:
        return None, None

    # load tonic
    try:
        tonic = float(open(t_file).read().strip())
    except:
        return None, None

    # cents
    cents = 1200 * np.log2(freqs/tonic)
    cents = cents[~np.isnan(cents)]
    if len(cents) < 100:
        return None, None

    # 60‑bin histogram
    hist, _ = np.histogram(cents, bins=60, range=(-600,1200), density=True)

    # label
    try:
        meta = json.load(open(j_file))
        label = meta["raaga"][0]["name"]
    except:
        return None, None

    return hist, label

# ——— 2) LOAD TRAINING DATA ———
def load_data(root_dir):
    X, y = [], []
    for rd, _, files in os.walk(root_dir):
        for fn in files:
            if fn.lower().endswith(".json"):
                base = os.path.join(rd, fn[:-5])
                hist, lbl = extract_hist(base)
                if hist is not None:
                    X.append(hist); y.append(lbl)
    X = np.array(X)
    y = np.array(y)
    print(f"🔍 After loading: X.shape = {X.shape}, y.shape = {y.shape}")
    print("📊 Raga distribution:", Counter(y))
    return X, y

DATA_DIR = "D:/carnatic/carnatic"
X, y = load_data(DATA_DIR)

if len(X)==0:
    print("❌ No training data—please check your pitch/tonic/json files.")
    raise SystemExit

# ——— 3) STANDARDIZE ———
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
print("✅ Scaled training data shape:", X_scaled.shape)

# ——— 4) TRAIN MODEL ———
clf = RandomForestClassifier(n_estimators=200, class_weight='balanced', random_state=42, n_jobs=-1)
clf.fit(X_scaled, y)
print("🏆 Model trained on histogram features.")

# ——— 5) PREDICTION FUNCTION ———
# ——— 5) PREDICTION FUNCTION (Top‑3) ———
def predict_raga(audio_path):
    # 5a) pitch track
    y_audio, sr = librosa.load(audio_path, sr=None)
    pitches = librosa.yin(y_audio,
                          fmin=librosa.note_to_hz("C2"),
                          fmax=librosa.note_to_hz("C7"),
                          sr=sr)
    pitches = pitches[~np.isnan(pitches)]
    if len(pitches) < 100:
        print("⚠️ Audio too short or noisy.")
        return

    # 5b) estimate tonic, compute cents, hist
    h, bins = np.histogram(np.log2(pitches), bins=100)
    tonic = 2 ** bins[np.argmax(h)]
    cents = 1200 * np.log2(pitches/tonic)
    hist, _ = np.histogram(cents, bins=60, range=(-600,1200), density=True)

    # 5c) reshape to 2D
    feat2d = hist.reshape(1, -1)

    # 5d) scale & get probabilities
    feat_scaled = scaler.transform(feat2d)
    probs = clf.predict_proba(feat_scaled)[0]

    # 5e) top‑3
    idxs = np.argsort(probs)[-3:][::-1]
    print("\n🎧 Top 3 Raga Predictions:")
    for i in idxs:
        print(f"→ {clf.classes_[i]}: {probs[i]*100:.1f}%")

# ——— 5) PREDICTION FUNCTION (Top‑3) ———
def predict_raga(audio_path):
    # 5a) pitch track
    y_audio, sr = librosa.load(audio_path, sr=None)
    pitches = librosa.yin(y_audio,
                          fmin=librosa.note_to_hz("C2"),
                          fmax=librosa.note_to_hz("C7"),
                          sr=sr)
    pitches = pitches[~np.isnan(pitches)]
    if len(pitches) < 100:
        print("⚠️ Audio too short or noisy.")
        return

    # 5b) estimate tonic, compute cents, hist
    h, bins = np.histogram(np.log2(pitches), bins=100)
    tonic = 2 ** bins[np.argmax(h)]
    cents = 1200 * np.log2(pitches/tonic)
    hist, _ = np.histogram(cents, bins=60, range=(-600,1200), density=True)

    # 5c) reshape to 2D
    feat2d = hist.reshape(1, -1)

    # 5d) scale & get probabilities
    feat_scaled = scaler.transform(feat2d)
    probs = clf.predict_proba(feat_scaled)[0]

    # 5e) top‑3
    idxs = np.argsort(probs)[-3:][::-1]
    print("\n🎧 Top 3 Raga Predictions:")
    for i in idxs:
        print(f"→ {clf.classes_[i]}: {probs[i]*100:.1f}%")

# ——— 6) RUN ———
audio_path = input("🎵 Enter your .mp3/.wav path: ").strip()
if os.path.isfile(audio_path):
    predict_raga(audio_path)
else:
    print("❌ File not found.")


🔍 After loading: X.shape = (184, 60), y.shape = (184,)
📊 Raga distribution: Counter({'Rāgamālika': 8, 'Saurāṣtraṁ': 7, 'Tōḍi': 7, 'Kamās': 7, 'Bhairavi': 5, 'Behāg': 5, 'Kalyāṇi': 4, 'Suraṭi': 4, 'Ṣanmukhapriya': 4, 'Rītigauḷa': 4, 'Mōhanaṁ': 4, 'Śankarābharaṇaṁ': 3, 'Bēgaḍa': 3, 'Kāṁbhōji': 3, 'Sāvēri': 3, 'Jōnpuri': 3, 'Sindhubhairavi': 3, 'Harikāmbhōji': 3, 'Kānaḍa': 3, 'Śrīranjani': 2, 'Pūrṇacandrika': 2, 'Madhyamāvati': 2, 'Nādanāmakriya': 2, 'Lalita': 2, 'Nāṭa': 2, 'Kalgaḍa': 2, 'Jaganmōhini': 2, 'Hamīr kaḷyaṇi': 2, 'Pāḍi': 2, 'Amṛtavarṣiṇi': 2, 'Maṇirangu': 2, 'Śuddadhanyāsi': 2, 'Kuntalavarāḷi': 2, 'Kāpi': 2, 'Ābhōgi': 2, 'Karṇāṭaka dēvagāndhāri': 2, 'Kumudakriyā': 2, 'Karaharapriya': 2, 'Gauḷa': 2, 'Mārgahindōḷaṁ': 2, 'Kēdāragauḷa': 2, 'Sahānā': 2, 'Nīlāṁbari': 2, 'Puṇṇāgavarāḷi': 1, 'Kathanakutūhalaṁ': 1, 'Sālaga bhairavi': 1, 'Kalāvati': 1, 'Dvijāvanti': 1, 'Kedāraṁ': 1, 'Cakravākaṁ': 1, 'Sāranga': 1, 'Sarasvatī manōhari': 1, 'Sāma': 1, 'Gamakakriyā': 1, 'Sindhumandāri': 1, 

🎵 Enter your .mp3/.wav path:  D:\carnatic\carnatic\13\Sanjay Subrahmanyan - Sri Raghuvara Sugunaalaya.mp3



🎧 Top 3 Raga Predictions:
→ Śrīranjani: 15.0%
→ Maṇirangu: 7.0%
→ Navrōj: 6.5%


In [1]:
import os
import numpy as np
import json
import librosa
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from collections import Counter
from imblearn.over_sampling import RandomOverSampler
import warnings

warnings.filterwarnings("ignore")

# ——— 1) HISTOGRAM FEATURE ONLY ———
def extract_hist(base):
    p = base + ".pitch.txt"
    t = base + ".ctonic.txt"
    j = base + ".json"
    if not all(os.path.isfile(f) for f in (p, t, j)):
        return None, None

    try:
        data = np.loadtxt(p)
        freqs = data[:,1]; freqs = freqs[freqs>0]
        tonic = float(open(t).read().strip())
        cents = 1200*np.log2(freqs/tonic)
        cents = cents[~np.isnan(cents)]
        if len(cents)<100: return None, None
        hist, _ = np.histogram(cents, bins=60, range=(-600,1200), density=True)
        meta = json.load(open(j))
        label = meta["raaga"][0]["name"]
        return hist, label
    except:
        return None, None

# ——— 2) LOAD & OVERSAMPLE ———
def load_and_oversample(root):
    X, y = [], []
    for rd, _, files in os.walk(root):
        for fn in files:
            if fn.lower().endswith(".json"):
                base = os.path.join(rd, fn[:-5])
                h,l = extract_hist(base)
                if h is not None:
                    X.append(h); y.append(l)
    X = np.array(X); y = np.array(y)
    print("Before oversample:", Counter(y))

    ros = RandomOverSampler(random_state=42)
    Xr, yr = ros.fit_resample(X, y)
    print(" After oversample:", Counter(yr))
    return Xr, yr

DATA_DIR = "D:/carnatic/carnatic"
X, y = load_and_oversample(DATA_DIR)

# ——— 3) SCALE & TRAIN ———
scaler = StandardScaler()
Xs = scaler.fit_transform(X)

clf = RandomForestClassifier(
    n_estimators=200,
    class_weight='balanced',
    random_state=42,
    n_jobs=-1
)
clf.fit(Xs, y)
print("🏆 Trained on oversampled data.")

# ——— 4) PREDICTION (Top‑3) ———
def predict_top3(audio_path):
    y_a, sr = librosa.load(audio_path, sr=None)
    pitches = librosa.yin(y_a,
                          fmin=librosa.note_to_hz("C2"),
                          fmax=librosa.note_to_hz("C7"),
                          sr=sr)
    pitches = pitches[~np.isnan(pitches)]
    if len(pitches)<100:
        print("⚠️ Audio too short/noisy."); return

    h, bins = np.histogram(np.log2(pitches), bins=100)
    tonic = 2**bins[np.argmax(h)]
    cents = 1200*np.log2(pitches/tonic)
    hist, _ = np.histogram(cents, bins=60, range=(-600,1200), density=True)

    f2 = hist.reshape(1,-1)
    f2s = scaler.transform(f2)
    probs = clf.predict_proba(f2s)[0]
    idxs = np.argsort(probs)[-3:][::-1]

    print("\n🎧 Top 3 Raga Predictions:")
    for i in idxs:
        print(f"→ {clf.classes_[i]}: {probs[i]*100:.1f}%")

# ——— 5) RUN ———
path = input("🎵 Path to .mp3/.wav: ").strip()
if os.path.isfile(path):
    predict_top3(path)
else:
    print("❌ File not found.")


Before oversample: Counter({'Rāgamālika': 8, 'Saurāṣtraṁ': 7, 'Tōḍi': 7, 'Kamās': 7, 'Bhairavi': 5, 'Behāg': 5, 'Kalyāṇi': 4, 'Suraṭi': 4, 'Ṣanmukhapriya': 4, 'Rītigauḷa': 4, 'Mōhanaṁ': 4, 'Śankarābharaṇaṁ': 3, 'Bēgaḍa': 3, 'Kāṁbhōji': 3, 'Sāvēri': 3, 'Jōnpuri': 3, 'Sindhubhairavi': 3, 'Harikāmbhōji': 3, 'Kānaḍa': 3, 'Śrīranjani': 2, 'Pūrṇacandrika': 2, 'Madhyamāvati': 2, 'Nādanāmakriya': 2, 'Lalita': 2, 'Nāṭa': 2, 'Kalgaḍa': 2, 'Jaganmōhini': 2, 'Hamīr kaḷyaṇi': 2, 'Pāḍi': 2, 'Amṛtavarṣiṇi': 2, 'Maṇirangu': 2, 'Śuddadhanyāsi': 2, 'Kuntalavarāḷi': 2, 'Kāpi': 2, 'Ābhōgi': 2, 'Karṇāṭaka dēvagāndhāri': 2, 'Kumudakriyā': 2, 'Karaharapriya': 2, 'Gauḷa': 2, 'Mārgahindōḷaṁ': 2, 'Kēdāragauḷa': 2, 'Sahānā': 2, 'Nīlāṁbari': 2, 'Puṇṇāgavarāḷi': 1, 'Kathanakutūhalaṁ': 1, 'Sālaga bhairavi': 1, 'Kalāvati': 1, 'Dvijāvanti': 1, 'Kedāraṁ': 1, 'Cakravākaṁ': 1, 'Sāranga': 1, 'Sarasvatī manōhari': 1, 'Sāma': 1, 'Gamakakriyā': 1, 'Sindhumandāri': 1, 'Dhanyāsi': 1, 'Ānandabhairavi': 1, 'Hindōḷaṁ': 1, 'Rasik

🎵 Path to .mp3/.wav:  D:\carnatic\carnatic\9\Sanjay Subrahmanyan - Eranapai.mp3



🎧 Top 3 Raga Predictions:
→ Janaranjani: 7.5%
→ Harikāmbhōji: 7.0%
→ Kalāvati: 7.0%


In [None]:
import os
import numpy as np
import json
import librosa
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from collections import Counter
from imblearn.over_sampling import RandomOverSampler
from scipy.stats import gaussian_kde
import warnings

warnings.filterwarnings("ignore")

# ——— 1) DATASET FILE CHECKER ———
def check_dataset_files(root):
    print("🔍 Checking dataset integrity...\n")
    missing = []
    total = 0
    for rd, _, files in os.walk(root):
        json_files = [f for f in files if f.endswith(".json")]
        for fn in json_files:
            base = os.path.join(rd, fn[:-5])
            required = [base + ext for ext in [".json", ".pitch.txt", ".ctonic.txt", ".mp3"]]
            missing_parts = [os.path.basename(f) for f in required if not os.path.isfile(f)]
            if missing_parts:
                missing.append((os.path.basename(base), missing_parts))
            total += 1

    print(f"📦 Total samples checked: {total}")
    print(f"❌ Incomplete samples: {len(missing)}\n")
    for name, parts in missing:
        print(f" - {name} missing: {', '.join(parts)}")
    if not missing:
        print("✅ All samples are complete!")
    print("-" * 50)

# ——— 2) FEATURE EXTRACTION (.mp3 support) ———
def extract_features(base):
    p = base + ".pitch.txt"
    t = base + ".ctonic.txt"
    j = base + ".json"
    a = base + ".mp3"  # .mp3 instead of .wav

    if not all(os.path.isfile(f) for f in (p, t, j, a)):
        return None, None

    try:
        # Pitch histogram
        data = np.loadtxt(p)
        freqs = data[:, 1]
        freqs = freqs[freqs > 0]
        tonic = float(open(t).read().strip())
        cents = 1200 * np.log2(freqs / tonic)
        cents = cents[~np.isnan(cents)]
        if len(cents) < 100:
            return None, None
        hist, _ = np.histogram(cents, bins=120, range=(-300, 900), density=True)

        # MFCC and Chroma from .mp3
        y, sr = librosa.load(a, sr=None)
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)

        mfcc_mean = np.mean(mfcc, axis=1)
        mfcc_std = np.std(mfcc, axis=1)
        chroma_mean = np.mean(chroma, axis=1)

        features = np.concatenate([hist, mfcc_mean, mfcc_std, chroma_mean])
        meta = json.load(open(j))
        label = meta["raaga"][0]["name"]

        return features, label
    except Exception as e:
        print("⚠️ Error in", base, ":", e)
        return None, None

# ——— 3) LOAD & OVERSAMPLE ———
def load_and_oversample(root):
    X, y = [], []
    for rd, _, files in os.walk(root):
        for fn in files:
            if fn.lower().endswith(".json"):
                base = os.path.join(rd, fn[:-5])
                f, l = extract_features(base)
                if f is not None:
                    X.append(f)
                    y.append(l)
    X = np.array(X)
    y = np.array(y)
    print("✅ Valid samples loaded:", len(X))

    if len(X) == 0:
        raise ValueError("❌ No valid data found. Check dataset integrity.")

    print("Before oversample:", Counter(y))
    ros = RandomOverSampler(random_state=42)
    Xr, yr = ros.fit_resample(X, y)
    print(" After oversample:", Counter(yr))
    return Xr, yr

# ——— 4) TRAIN THE CLASSIFIER ———
def train_model(data_dir):
    check_dataset_files(data_dir)
    X, y = load_and_oversample(data_dir)

    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)

    clf = RandomForestClassifier(
        n_estimators=200,
        class_weight='balanced',
        random_state=42,
        n_jobs=-1
    )
    clf.fit(X_scaled, y)
    print("🏆 Model trained successfully.\n")
    return clf, scaler

# ——— 5) PREDICT TOP‑3 ———
def predict_top3(audio_path, clf, scaler):
    y_a, sr = librosa.load(audio_path, sr=None)

    # Tonic estimation using KDE
    pitches = librosa.yin(y_a,
                          fmin=librosa.note_to_hz("C2"),
                          fmax=librosa.note_to_hz("C7"),
                          sr=sr)
    pitches = pitches[~np.isnan(pitches)]
    if len(pitches) < 100:
        print("⚠️ Audio too short or noisy.")
        return

    log_pitches = np.log2(pitches)
    kde = gaussian_kde(log_pitches)
    x_vals = np.linspace(np.min(log_pitches), np.max(log_pitches), 1000)
    tonic_log = x_vals[np.argmax(kde(x_vals))]
    tonic = 2 ** tonic_log

    cents = 1200 * np.log2(pitches / tonic)
    hist, _ = np.histogram(cents, bins=120, range=(-300, 900), density=True)

    # MFCC + Chroma from .mp3
    mfcc = librosa.feature.mfcc(y=y_a, sr=sr, n_mfcc=13)
    chroma = librosa.feature.chroma_stft(y=y_a, sr=sr)
    mfcc_mean = np.mean(mfcc, axis=1)
    mfcc_std = np.std(mfcc, axis=1)
    chroma_mean = np.mean(chroma, axis=1)

    features = np.concatenate([hist, mfcc_mean, mfcc_std, chroma_mean])
    features_scaled = scaler.transform([features])
    probs = clf.predict_proba(features_scaled)[0]
    idxs = np.argsort(probs)[-3:][::-1]

    print("\n🎧 Top 3 Raga Predictions:")
    for i in idxs:
        print(f"→ {clf.classes_[i]}: {probs[i] * 100:.1f}%")

# ——— 6) MAIN SCRIPT ———
if __name__ == "__main__":
    DATA_DIR = "D:/carnatic/carnatic"  # 🔁 Change to your dataset path

    # Train model
    clf, scaler = train_model(DATA_DIR)

    # Predict from an MP3 file
    test_path = input("\n🎵 Enter path to test .mp3: ").strip()
    if os.path.isfile(test_path):
        predict_top3(test_path, clf, scaler)
    else:
        print("❌ File not found.")


🔍 Checking dataset integrity...

📦 Total samples checked: 197
❌ Incomplete samples: 0

✅ All samples are complete!
--------------------------------------------------
⚠️ Error in D:/carnatic/carnatic\0\Cherthala Ranganatha Sharma - Varashiki Vahana : list index out of range
⚠️ Error in D:/carnatic/carnatic\111\Mahati - Gopi Gopala Bala : list index out of range
⚠️ Error in D:/carnatic/carnatic\141\Chaitra Sairam - Ardhanareeshwaram : list index out of range
⚠️ Error in D:/carnatic/carnatic\146\Chaitra Sairam - Gange Maampahi : list index out of range
⚠️ Error in D:/carnatic/carnatic\152\Kuldeep Pai - Gange Maam Pahi : list index out of range
⚠️ Error in D:/carnatic/carnatic\154\Kuldeep Pai - Vasudeva Sutam Devam : list index out of range


In [6]:
import os

def check_dataset_files(root):
    print("🔍 Checking dataset integrity...\n")
    missing = []
    total = 0
    for rd, _, files in os.walk(root):
        json_files = [f for f in files if f.lower().endswith(".json")]
        for fn in json_files:
            total += 1
            base = os.path.join(rd, fn[:-5])  # remove .json extension
            required = [base + ext for ext in [".json", ".pitch.txt", ".ctonic.txt", ".wav"]]
            missing_parts = [os.path.basename(f) for f in required if not os.path.isfile(f)]
            if missing_parts:
                missing.append((os.path.basename(base), missing_parts))

    print(f"📦 Total samples checked: {total}")
    print(f"❌ Incomplete samples found: {len(missing)}\n")

    for name, parts in missing:
        print(f" - {name} missing: {', '.join(parts)}")

    if not missing:
        print("✅ All samples have required files!")
    print("-" * 50)

# Replace this with your path
DATA_DIR = "D:/carnatic/carnatic"
check_dataset_files(DATA_DIR)


🔍 Checking dataset integrity...

📦 Total samples checked: 197
❌ Incomplete samples found: 197

 - Cherthala Ranganatha Sharma - Varashiki Vahana missing: Cherthala Ranganatha Sharma - Varashiki Vahana.wav
 - Cherthala Ranganatha Sharma - Bhuvini Dasudane missing: Cherthala Ranganatha Sharma - Bhuvini Dasudane.wav
 - Sanjay Subrahmanyan - Pullum Silambena Kan missing: Sanjay Subrahmanyan - Pullum Silambena Kan.wav
 - Modhumudi Sudhakar - Telisi Rama missing: Modhumudi Sudhakar - Telisi Rama.wav
 - Modhumudi Sudhakar - Sundari Nee Divya missing: Modhumudi Sudhakar - Sundari Nee Divya.wav
 - Modhumudi Sudhakar - Rama Namam Bhajare missing: Modhumudi Sudhakar - Rama Namam Bhajare.wav
 - Modhumudi Sudhakar - Ghandhamu Poyyaruga missing: Modhumudi Sudhakar - Ghandhamu Poyyaruga.wav
 - Modhumudi Sudhakar - Mangalam Avanisutanatha missing: Modhumudi Sudhakar - Mangalam Avanisutanatha.wav
 - Mahati - Sharanagatha Vatsale missing: Mahati - Sharanagatha Vatsale.wav
 - Mahati - Hiranmayeem missing