In [9]:
import librosa
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

def extract_features(file_path):
    audio_data, sample_rate = librosa.load(file_path)
    mfccs = librosa.feature.mfcc(y=audio_data, sr=sample_rate, n_mfcc=13)
    return np.mean(mfccs.T, axis=0)

audio_files = [
    "/content/mixkit-car-horn-718.wav",
    "/content/car-start-and-driving-away-2-31339.wav",
    "/content/350cc-bike-firing-32391.wav",
    "/content/mixkit-failed-car-ignition-1540.wav",
    "/content/mixkit-failed-car-ignition-1540.wav",
    "/content/highway-traffic-cars-and-street-sounds-8029.wav"  # Include the new sample
]

labels = ["car", "car", "bike", "truck", "bus", "car"]

X = np.array([extract_features(file) for file in audio_files])
y = np.array(labels)

# Balance the classes by duplicating samples (temporary solution)
X = np.concatenate([X, X[y == "bus"]])
y = np.concatenate([y, y[y == "bus"]])

# Feature normalization
scaler = StandardScaler()
X = scaler.fit_transform(X)

print("X shape:", X.shape)
print("y shape:", y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Experiment with hyperparameters
clf = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

new_sample = "/content/highway-traffic-cars-and-street-sounds-8029.wav"
new_features = scaler.transform([extract_features(new_sample)])  # Apply the same scaler to the new sample
predicted_label = clf.predict(new_features)[0]
print("Predicted label for", new_sample, ":", predicted_label)


X shape: (7, 13)
y shape: (7,)
Accuracy: 0.0
Predicted label for /content/highway-traffic-cars-and-street-sounds-8029.wav : car
