In [24]:
!pip install pydub
!pip install ffmpeg-python

Collecting pydub
  Using cached pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Installing collected packages: pydub
Successfully installed pydub-0.25.1
Collecting ffmpeg-python
  Downloading ffmpeg_python-0.2.0-py3-none-any.whl.metadata (1.7 kB)
Collecting future (from ffmpeg-python)
  Using cached future-1.0.0-py3-none-any.whl.metadata (4.0 kB)
Downloading ffmpeg_python-0.2.0-py3-none-any.whl (25 kB)
Using cached future-1.0.0-py3-none-any.whl (491 kB)
Installing collected packages: future, ffmpeg-python
Successfully installed ffmpeg-python-0.2.0 future-1.0.0


In [26]:
!pip install librosa



In [14]:
#Importing Required Libraries

import os
import librosa
import numpy as np
import pandas as pd
from pydub import AudioSegment
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')

In [16]:
# Convert WAV files to PCM 16-bit format

train_df = pd.read_csv('./dataset/train.csv')

input_folder = './dataset/audios_train'
output_folder = './converted'
os.makedirs(output_folder, exist_ok=True)

for fname in train_df['filename']:
    input_path = os.path.join(input_folder, fname)
    output_path = os.path.join(output_folder, fname)

    try:
        if os.path.exists(input_path):
            audio = AudioSegment.from_file(input_path)
            audio.export(output_path, format='wav', codec='pcm_s16le')
            # print(f"Converted: {fname}")
        else:
            print(f"[MISSING] {fname}")
    except Exception as e:
        print(f"[FAILED] {fname}, Reason: {e}")


In [18]:
# Defining Feature Extracting Function

# We define a function to load an audio file and extract MFCC features. 
#For each audio file, we compute MFCC and then aggregate them by calculating the mean and standard deviation over time frames.

def extract_features(file_path):
    try:
        audio, sr = librosa.load(file_path, sr=None, mono=True, dtype=np.float32)   # Load the audio file. librosa.load returns the audio time series and sampling rate.
        mfcc = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=13)   # Compute MFCCs
        mfcc_mean = np.mean(mfcc.T, axis=0)   # Calculate mean over time frames.
        return mfcc_mean
        
    except Exception as e:
        print(f"[ERROR] {file_path} - {e}")
        return None

In [22]:
# Loding Data and Extracting Features

# We extract features from each audio file, and build the feature matrix and corresponding labels.

train_features = []
train_scores = []

for idx, row in train_df.iterrows():
    file_path = os.path.join('converted', row['filename'])
    features = extract_features(file_path)

    if features is not None:
        train_features.append(features)
        train_scores.append(row['label'])

# Convert lists to numpy arrays
X = np.array(train_features)
y = np.array(train_scores)

print("Training features shape:", X.shape)
print("Training labels shape:", y.shape)

Training features shape: (444, 13)
Training labels shape: (444,)


In [24]:
# Training the Model

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)   # Split the training data into a training set and validation set

model = RandomForestRegressor(n_estimators=100, random_state=42)   # Define the baseline regression model
model.fit(X_train, y_train)   # Training the model on the training split

In [26]:
# Evaluating the Model
y_pred = model.predict(X_val)
rmse = np.sqrt(mean_squared_error(y_val, y_pred))
r2 = r2_score(y_val, y_pred)

print(f"Validation RMSE: {rmse: .4f}")
print(f"Validation R-square Score: {r2: .4f}")

Validation RMSE:  1.0857
Validation R-square Score:  0.1353


In [28]:
# Convert WAV files to PCM 16-bit format [This time for test data]

test_df = pd.read_csv('./dataset/test.csv')

input_folder = './dataset/audios_test'
output_folder = './converted_test'
os.makedirs(output_folder, exist_ok=True)

for fname in test_df['filename']:
    input_path = os.path.join(input_folder, fname)
    output_path = os.path.join(output_folder, fname)

    try:
        if os.path.exists(input_path):
            audio = AudioSegment.from_file(input_path)
            audio.export(output_path, format='wav', codec='pcm_s16le')
            # print(f"Converted: {fname}")
        else:
            print(f"[MISSING]: {fname}")
            
    except Exception as e:
        print(f"[FAILED] {fname}, Reason: {e}")

In [30]:
# Prediction on Test Data

test_features = []

for idx, row in test_df.iterrows():
    file_path = os.path.join('converted_test', row['filename'])
    features = extract_features(file_path)   # Extracting test data features

    if features is not None:
        test_features.append(features)    

    else:
        test_features.append(np.zeros(13))

X_test = np.array(test_features)
test_preds = model.predict(X_test)    # Predict grammar scores on the test data using the trained model

In [34]:
# Generating Submissions

submission = pd.read_csv('./dataset/sample_submission.csv')  
submission['label'] = test_preds    #Taking the 'sample_submission' csv file and getting the scores by giving it to the trained model
submission.to_csv('submission.csv', index=False)   # Save the submission to a CSV file

print("Submission file created: submission.csv")

Submission file created: submission.csv
