In [38]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import joblib
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import GridSearchCV# imports_and_config.py
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import librosa
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix


In [39]:
from sklearn.preprocessing import LabelEncoder
import joblib

def preprocess_data(train_df, test_df):
    label_encoder = LabelEncoder()
    
    # Apply label encoding to the 'label' column in the train dataset
    train_df['label'] = label_encoder.fit_transform(train_df['label'])
    
    # Test dataset doesn't have 'label', so we add a placeholder value (-1 or any other placeholder)
    test_df['label'] = -1  # Placeholder for test set labels
    
    # Save the label encoder for future use (for prediction on new data)
    joblib.dump(label_encoder, 'label_encoder.pkl')  # Ensure this path is correct
    
    return train_df, test_df, label_encoder

In [40]:
def extract_features(file_path, n_mfcc=13):
    try:
        y, sr = librosa.load(file_path, sr=None)
        
        # MFCCs
        mfccs = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=n_mfcc)
        mfccs_mean = np.mean(mfccs, axis=1)
        mfccs_std = np.std(mfccs, axis=1)

        # Chroma
        chroma = librosa.feature.chroma_stft(y=y, sr=sr)
        chroma_mean = np.mean(chroma, axis=1)

        # Zero-crossing rate
        zcr = librosa.feature.zero_crossing_rate(y)
        zcr_mean = np.mean(zcr)

        # Spectral contrast
        spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr)
        contrast_mean = np.mean(spectral_contrast, axis=1)

        # Combine all features
        features = np.hstack([
            mfccs_mean, mfccs_std, chroma_mean, [zcr_mean], contrast_mean
        ])
        return features
    except Exception as e:
        print(f"Error with {file_path}: {e}")
        return np.zeros(n_mfcc*2 + 12 + 1 + 7)  # Adjust based on feature lengths


In [41]:
def extract_all_features(df, base_path="data/audios/", n_mfcc=13, is_train=True):
    features = []
    # Select the correct folder: train or test
    folder = "train" if is_train else "test"
    for filename in df["filename"]:
        full_path = os.path.join(base_path, folder, filename)
        print(f"Checking file path: {full_path}")  # Debugging line
        mfcc = extract_features(full_path, n_mfcc)
        features.append(mfcc)
    return np.array(features)


In [42]:
def split_data(df, test_size=0.2, random_state=42):
    X = df.drop(columns=['label'])
    y = df['label']
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=test_size, random_state=random_state)
    return X_train, X_val, y_train, y_val

In [43]:
def normalize_features(X_train, X_val):
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_val_scaled = scaler.transform(X_val)
    return X_train_scaled, X_val_scaled, scaler

In [44]:
def train_and_tune_model(X_train, y_train):
    model = RandomForestClassifier(random_state=42)
    
    # Hyperparameter tuning using GridSearchCV
    param_grid = {
        'n_estimators': [50, 100, 200],
        'max_depth': [None, 10, 20, 30],
        'min_samples_split': [2, 5, 10],
        'min_samples_leaf': [1, 2, 4]
    }
    
    grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, n_jobs=-1, verbose=2)
    grid_search.fit(X_train, y_train)
    
    best_model = grid_search.best_estimator_
    print(f"Best parameters: {grid_search.best_params_}")
    print(f"Best cross-validation score: {grid_search.best_score_}")
    
    return best_model

In [45]:
def evaluate_model(model, X_test, y_test):
    y_pred = model.predict(X_test)
    print("Classification Report:\n", classification_report(y_test, y_pred))
    print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

In [46]:
def save_model_and_scaler(model, scaler, model_path="model/random_forest_model.pkl", scaler_path="model/scaler.pkl"):
    if not os.path.exists("model"):
        os.makedirs("model")
    joblib.dump(model, model_path)
    joblib.dump(scaler, scaler_path)
    print(f"Model and scaler saved at {model_path} and {scaler_path}")

In [47]:
def load_model_and_scaler(model_path="model/random_forest_model.pkl", scaler_path="model/scaler.pkl"):
    model = joblib.load(model_path)
    scaler = joblib.load(scaler_path)
    return model, scaler

In [48]:
def score_audio(file_path, model, scaler, n_mfcc=13):
    features = extract_features(file_path, n_mfcc)
    features_scaled = scaler.transform([features])
    prediction = model.predict(features_scaled)
    return prediction

In [49]:
pip install --upgrade scikit-learn imbalanced-learn

Note: you may need to restart the kernel to use updated packages.


In [50]:
from imblearn.over_sampling import SMOTE
from sklearn.model_selection import StratifiedKFold

In [51]:
import pandas as pd

# Assuming your CSV files are in the 'data/' folder
train_df = pd.read_csv('data/train.csv')
test_df = pd.read_csv('data/test.csv')


In [52]:
# 1. Encode labels & extract features
train_df, test_df, label_encoder = preprocess_data(train_df, test_df)
X_features = extract_all_features(train_df, is_train=True)
y_labels = train_df['label'].values

Checking file path: data/audios/train\audio_710.wav
Checking file path: data/audios/train\audio_1265.wav
Checking file path: data/audios/train\audio_1114.wav
Checking file path: data/audios/train\audio_946.wav
Checking file path: data/audios/train\audio_1127.wav
Checking file path: data/audios/train\audio_669.wav
Checking file path: data/audios/train\audio_1029.wav
Checking file path: data/audios/train\audio_713.wav
Checking file path: data/audios/train\audio_845.wav
Checking file path: data/audios/train\audio_150.wav
Checking file path: data/audios/train\audio_764.wav
Checking file path: data/audios/train\audio_1253.wav
Checking file path: data/audios/train\audio_1271.wav
Checking file path: data/audios/train\audio_638.wav
Checking file path: data/audios/train\audio_755.wav
Checking file path: data/audios/train\audio_950.wav
Checking file path: data/audios/train\audio_1232.wav
Checking file path: data/audios/train\audio_346.wav
Checking file path: data/audios/train\audio_730.wav
Check

Checking file path: data/audios/train\audio_469.wav
Checking file path: data/audios/train\audio_851.wav
Checking file path: data/audios/train\audio_372.wav
Checking file path: data/audios/train\audio_174.wav
Checking file path: data/audios/train\audio_275.wav
Checking file path: data/audios/train\audio_899.wav
Checking file path: data/audios/train\audio_784.wav
Checking file path: data/audios/train\audio_1151.wav
Checking file path: data/audios/train\audio_491.wav
Checking file path: data/audios/train\audio_844.wav
Checking file path: data/audios/train\audio_1132.wav
Checking file path: data/audios/train\audio_157.wav
Checking file path: data/audios/train\audio_811.wav
Checking file path: data/audios/train\audio_237.wav
Checking file path: data/audios/train\audio_662.wav
Checking file path: data/audios/train\audio_593.wav
Checking file path: data/audios/train\audio_1239.wav
Checking file path: data/audios/train\audio_1277.wav
Checking file path: data/audios/train\audio_1161.wav
Checkin

Checking file path: data/audios/train\audio_1174.wav
Checking file path: data/audios/train\audio_118.wav
Checking file path: data/audios/train\audio_633.wav
Checking file path: data/audios/train\audio_1168.wav
Checking file path: data/audios/train\audio_523.wav
Checking file path: data/audios/train\audio_987.wav
Checking file path: data/audios/train\audio_533.wav
Checking file path: data/audios/train\audio_557.wav
Checking file path: data/audios/train\audio_356.wav
Checking file path: data/audios/train\audio_860.wav
Checking file path: data/audios/train\audio_522.wav
Checking file path: data/audios/train\audio_370.wav
Checking file path: data/audios/train\audio_148.wav
Checking file path: data/audios/train\audio_1096.wav
Checking file path: data/audios/train\audio_396.wav
Checking file path: data/audios/train\audio_480.wav
Checking file path: data/audios/train\audio_36.wav
Checking file path: data/audios/train\audio_1078.wav
Checking file path: data/audios/train\audio_802.wav
Checking 

In [53]:
# 2. Split
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
    X_features, y_labels, test_size=0.2, random_state=42, stratify=y_labels
)

In [54]:
# 3. Normalize
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_split)
X_val_scaled = scaler.transform(X_val_split)

In [55]:
from imblearn.over_sampling import SMOTE

smote = SMOTE(random_state=42, k_neighbors=1)  # use 1 if your smallest class has 2 samples
X_resampled, y_resampled = smote.fit_resample(X_train_scaled, y_train_split)

In [56]:
model = RandomForestClassifier(class_weight='balanced', random_state=42)
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 10, 20],
    'min_samples_split': [2, 5],
    'min_samples_leaf': [1, 2]
}
cv = StratifiedKFold(n_splits=3, shuffle=True, random_state=42)
grid_search = GridSearchCV(model, param_grid, cv=cv, n_jobs=-1, verbose=2)
grid_search.fit(X_resampled, y_resampled)

Fitting 3 folds for each of 36 candidates, totalling 108 fits


In [57]:
# 6. Evaluate
best_model = grid_search.best_estimator_
evaluate_model(best_model, X_val_scaled, y_val_split)

Classification Report:
               precision    recall  f1-score   support

           2       0.46      0.67      0.55         9
           3       0.25      0.20      0.22         5
           4       0.46      0.60      0.52        10
           5       0.00      0.00      0.00         4
           6       0.46      0.35      0.40        17
           7       0.14      0.07      0.10        14
           8       0.54      0.67      0.60        30

    accuracy                           0.45        89
   macro avg       0.33      0.37      0.34        89
weighted avg       0.41      0.45      0.42        89

Confusion Matrix:
 [[ 6  0  2  0  0  0  1]
 [ 2  1  0  0  1  1  0]
 [ 1  1  6  0  1  0  1]
 [ 0  0  2  0  1  0  1]
 [ 2  2  0  1  6  1  5]
 [ 1  0  1  0  2  1  9]
 [ 1  0  2  1  2  4 20]]


In [58]:
# 7. Save
save_model_and_scaler(best_model, scaler)

Model and scaler saved at model/random_forest_model.pkl and model/scaler.pkl


In [59]:
def preprocess_test_only(test_df, label_encoder_path='label_encoder.pkl'):
    # Load the label encoder (for encoding test labels if needed)
    label_encoder = joblib.load(label_encoder_path)
    
    # Apply label encoding to the 'label' column in the test dataset
    test_df['label'] = -1  # Placeholder, no label for test set
    
    return test_df

In [60]:
# 1. Load the trained model and scaler
model, scaler = load_model_and_scaler()

# 2. Preprocess the test data (same preprocessing as train data)
test_df = preprocess_test_only(test_df)

# 3. Extract features from test data
X_test = extract_all_features(test_df, is_train=False)

# 4. Scale the test features using the scaler from training
X_test_scaled = scaler.transform(X_test)

# 5. Predict using the trained model
y_pred = model.predict(X_test_scaled)

# 6. Optionally, decode the predictions if needed (e.g., if you need original labels)
decoded_predictions = label_encoder.inverse_transform(y_pred)

# 7. Save the predictions if needed (e.g., to a CSV file)
test_df['predictions'] = decoded_predictions
test_df.to_csv('predictions.csv', index=False)


Checking file path: data/audios/test\audio_804.wav
Checking file path: data/audios/test\audio_1028.wav
Checking file path: data/audios/test\audio_865.wav
Checking file path: data/audios/test\audio_774.wav
Checking file path: data/audios/test\audio_1138.wav
Checking file path: data/audios/test\audio_278.wav
Checking file path: data/audios/test\audio_1212.wav
Checking file path: data/audios/test\audio_178.wav
Checking file path: data/audios/test\audio_542.wav
Checking file path: data/audios/test\audio_248.wav
Checking file path: data/audios/test\audio_872.wav
Checking file path: data/audios/test\audio_954.wav
Checking file path: data/audios/test\audio_853.wav
Checking file path: data/audios/test\audio_171.wav
Checking file path: data/audios/test\audio_922.wav
Checking file path: data/audios/test\audio_915.wav
Checking file path: data/audios/test\audio_1220.wav
Checking file path: data/audios/test\audio_1225.wav
Checking file path: data/audios/test\audio_903.wav
Checking file path: data/a

Checking file path: data/audios/test\audio_855.wav
Checking file path: data/audios/test\audio_652.wav
Checking file path: data/audios/test\audio_1069.wav
Checking file path: data/audios/test\audio_415.wav
Checking file path: data/audios/test\audio_1090.wav
Checking file path: data/audios/test\audio_766.wav
Checking file path: data/audios/test\audio_568.wav
Checking file path: data/audios/test\audio_885.wav
Checking file path: data/audios/test\audio_108.wav
Checking file path: data/audios/test\audio_1179.wav
Checking file path: data/audios/test\audio_377.wav
Checking file path: data/audios/test\audio_390.wav
Checking file path: data/audios/test\audio_692.wav
Checking file path: data/audios/test\audio_508.wav
Checking file path: data/audios/test\audio_357.wav
Checking file path: data/audios/test\audio_273.wav
Checking file path: data/audios/test\audio_1180.wav
Checking file path: data/audios/test\audio_668.wav
Checking file path: data/audios/test\audio_1187.wav
Checking file path: data/a