In [2]:
pip install librosa

Defaulting to user installation because normal site-packages is not writeable
Collecting librosa
  Downloading librosa-0.11.0-py3-none-any.whl.metadata (8.7 kB)
Collecting audioread>=2.1.9 (from librosa)
  Downloading audioread-3.0.1-py3-none-any.whl.metadata (8.4 kB)
Collecting numba>=0.51.0 (from librosa)
  Downloading numba-0.58.1-cp38-cp38-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (2.7 kB)
Collecting soundfile>=0.12.1 (from librosa)
  Downloading soundfile-0.13.1-py2.py3-none-manylinux_2_28_x86_64.whl.metadata (16 kB)
Collecting pooch>=1.1 (from librosa)
  Downloading pooch-1.8.2-py3-none-any.whl.metadata (10 kB)
Collecting soxr>=0.3.2 (from librosa)
  Downloading soxr-0.3.7-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (5.5 kB)
Collecting lazy_loader>=0.1 (from librosa)
  Downloading lazy_loader-0.4-py3-none-any.whl.metadata (7.6 kB)
Collecting msgpack>=1.0 (from librosa)
  Downloading msgpack-1.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x8

In [3]:
import os
import pandas as pd
import numpy as np
import librosa
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from tqdm import tqdm

In [9]:
TRAIN_AUDIO_PATH = "audios/train/"  # folder containing train wav files
TEST_AUDIO_PATH = "audios/test/"    # folder containing test wav files

In [10]:
# 📄 Load CSVs
train_df = pd.read_csv("train.csv")
test_df = pd.read_csv("test.csv")
sample_submission = pd.read_csv("sample_submission.csv")

In [11]:
train_df.head()

Unnamed: 0,filename,label
0,audio_710.wav,1.0
1,audio_1265.wav,1.0
2,audio_1114.wav,1.5
3,audio_946.wav,1.5
4,audio_1127.wav,2.0


In [12]:
# 🎵 Extract features from audio file
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    
    # Extract MFCC features (mean and std)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    mfcc_mean = np.mean(mfcc, axis=1)
    mfcc_std = np.std(mfcc, axis=1)
    
    # Chroma features
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = np.mean(chroma, axis=1)
    
    # Spectral Contrast
    contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
    contrast_mean = np.mean(contrast, axis=1)
    
    # Concatenate all features
    features = np.concatenate([mfcc_mean, mfcc_std, chroma_mean, contrast_mean])
    return features

In [13]:
# Extract features for training data
train_features = []
for fname in tqdm(train_df['filename']):
    file_path = os.path.join(TRAIN_AUDIO_PATH, fname)
    feats = extract_features(file_path)
    train_features.append(feats)

# Convert to numpy array
X = np.array(train_features)
y = train_df['label'].values

100%|█████████████████████████████████████████| 444/444 [02:36<00:00,  2.83it/s]


In [14]:
# Split for validation
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# 🔍 Validation
y_pred = model.predict(X_val)
rmse = mean_squared_error(y_val, y_pred, squared=False)
print(f"Validation RMSE: {rmse:.4f}")

Validation RMSE: 0.8582


In [15]:
# Extract features for test data
test_features = []
for fname in tqdm(test_df['filename']):
    file_path = os.path.join(TEST_AUDIO_PATH, fname)
    feats = extract_features(file_path)
    test_features.append(feats)

X_test = np.array(test_features)

# 🔮 Predict
test_preds = model.predict(X_test)

# Clip predictions between 0 and 5 (as per competition rule)
test_preds = np.clip(test_preds, 0, 5)

100%|█████████████████████████████████████████| 204/204 [01:05<00:00,  3.12it/s]


In [16]:
# Create submission dataframe
submission = pd.DataFrame({
    "filename": test_df['filename'],
    "label": test_preds
})

# Save CSV
submission.to_csv("submission.csv", index=False)
print("Submission saved!")
submission.head()

Submission saved!


Unnamed: 0,filename,label
0,audio_804.wav,2.855
1,audio_1028.wav,3.915
2,audio_865.wav,3.525
3,audio_774.wav,3.035
4,audio_1138.wav,3.205
