In [None]:
import numpy as np
from scipy.io import loadmat
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import classification_report
import glob

# === Function to summarize features (mean, std, min, max) ===
def summarize_feature(feat):
    return np.hstack([
        np.mean(feat, axis=1),
        np.std(feat, axis=1),
        np.min(feat, axis=1),
        np.max(feat, axis=1)
    ])

# === Load features and labels ===
feature_files = sorted(glob.glob('features_sample*.mat'))
X = []
y = []

for f in feature_files:
    data = loadmat(f)
    mfcc = data['mfcc']
    centroid = data['centroid']
    rolloff = data['rolloff']
    mel = data['mel']
    chroma = data['chroma']
    
    # Summarize and combine all features into one vector
    feature_vector = np.hstack([
        summarize_feature(mfcc),
        summarize_feature(centroid),
        summarize_feature(rolloff),
        summarize_feature(mel),
        summarize_feature(chroma)
    ])
    X.append(feature_vector)

# Read labels from file
with open('labels.txt', 'r') as f:
    for line in f:
        y.append(line.strip())

# Convert to numpy arrays
X = np.array(X)
y = np.array(y)

# Encode string labels into integers
le = LabelEncoder()
y_encoded = le.fit_transform(y)

# Split data into train and test sets (80% train, 20% test)
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

# Standardize features (zero mean, unit variance)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train SVM classifier with RBF kernel
clf = SVC(kernel='rbf', C=1, gamma='auto', random_state=42)
clf.fit(X_train, y_train)

# Predict and print classification report
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred, target_names=le.classes_))
