In [None]:
#importing libraries
import os
import librosa
import librosa.display
import numpy as np
import pandas as pd
from sklearn.decomposition import TruncatedSVD
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib
import matplotlib.pyplot as plt

#file paths
AUDIO_DIR = "data/audio_files"   # Folder containing all audio files
CSV_PATH = "data/metadata.csv"   # CSV with filename and class


In [None]:
#reading data
metadata = pd.read_csv(CSV_PATH)
print(metadata.head())

#checking classes
print(f"cclasses available: {metadata['class'].unique()}")


In [None]:
#feature extraction
def extract_features(file_path):
    y, sr = librosa.load(file_path, duration=5.0, sr=None)  # fixed 5-sec clip
    
    # Temporal and spectral features
    zcr = librosa.feature.zero_crossing_rate(y).mean()
    centroid = librosa.feature.spectral_centroid(y=y, sr=sr).mean()
    bandwidth = librosa.feature.spectral_bandwidth(y=y, sr=sr).mean()
    tempo, _ = librosa.beat.beat_track(y=y, sr=sr)
    
    # MFCCs
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    mfccs_mean = mfcc.mean(axis=1)
    
    # Chroma & Tonnetz
    chroma = librosa.feature.chroma_stft(y=y, sr=sr).mean(axis=1)
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr).mean(axis=1)
    
    features = np.hstack([
        zcr, centroid, bandwidth, tempo,
        mfccs_mean, chroma[:2], tonnetz[:2]
    ])
    return features


In [None]:

features_list = []
labels = []

#extract features and labels
for i, row in metadata.iterrows():
    file_path = os.path.join(AUDIO_DIR, row['filename'])
    try:
        features = extract_features(file_path)
        features_list.append(features)
        labels.append(row['class'])
    except Exception as e:
        print(f"Error processing {row['filename']}: {e}")

X = np.array(features_list)
y = np.array(labels)

print(f"Feature matrix shape: {X.shape}")


In [None]:
#standardize
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

#apply SVD
svd = TruncatedSVD(n_components=10, random_state=42)  # tune this!
X_svd = svd.fit_transform(X_scaled)

#plot
plt.plot(np.cumsum(svd.explained_variance_ratio_), marker='o')
plt.xlabel("Number of components")
plt.ylabel("Cumulative explained variance")
plt.title("SVD explained variance")
plt.show()


In [None]:
#train
X_train, X_test, y_train, y_test = train_test_split(X_svd, y, test_size=0.2, random_state=42, stratify=y)

clf = RandomForestClassifier(n_estimators=200, random_state=42)
clf.fit(X_train, y_train)

#test
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))


In [None]:
#save
joblib.dump({
    "model": clf,
    "scaler": scaler,
    "svd": svd
}, "audio_classifier.pkl")

print("Model saved as audio_classifier.pkl")
