In [4]:
# ===============================================
# 1Ô∏è‚É£ IMPORT LIBRARIES
# ===============================================
import os
import librosa
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import classification_report, accuracy_score
import joblib

print("‚úÖ Libraries loaded successfully.")

# ===============================================
# 2Ô∏è‚É£ SET DATASET PATHS (UPDATED FOR YOUR STRUCTURE)
# ===============================================
metadata_path = "../dataset_forest/UrbanSound8K.csv"
audio_dir = "../dataset_forest/audio"

metadata = pd.read_csv(metadata_path)
print("üîπ Total audio samples:", len(metadata))

# ===============================================
# 3Ô∏è‚É£ RELABEL DATA (ILLEGAL = 1, NATURAL = 0)
# ===============================================
illegal_classes = ['gun_shot', 'jackhammer', 'engine_idling', 'car_horn']

metadata['label'] = metadata['class'].apply(
    lambda x: 1 if x.strip().lower() in illegal_classes else 0
)

print("üîπ Illegal samples:", metadata['label'].sum())
print("üîπ Natural samples:", (metadata['label']==0).sum())

# ===============================================
# 4Ô∏è‚É£ FEATURE EXTRACTION FUNCTION
# ===============================================
def extract_features(file_path):
    try:
        y, sr = librosa.load(file_path, sr=22050, mono=True)  # Load audio
        mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)    # Extract MFCCs
        mfcc_mean = np.mean(mfcc.T, axis=0)                   # Take mean over time
        return mfcc_mean
    except Exception as e:
        print("‚ö†Ô∏è Error loading:", file_path, "| Error:", e)
        return None

# ===============================================
# 5Ô∏è‚É£ LOOP THROUGH DATASET AND EXTRACT FEATURES
# ===============================================
features = []
labels = []

print("‚è≥ Extracting features from audio files... This may take several minutes.")

for idx, row in metadata.iterrows():
    file_path = os.path.join(audio_dir, 'fold'+str(row['fold']), row['slice_file_name'])
    if not os.path.exists(file_path):
        print("‚ö†Ô∏è Missing file:", file_path)
        continue
    mfccs = extract_features(file_path)
    if mfccs is not None:
        features.append(mfccs)
        labels.append(row['label'])

# Convert to numpy arrays
X = np.array(features)
y = np.array(labels)

print("‚úÖ Feature extraction completed.")
print("üîπ Feature dataset shape:", X.shape)

# ===============================================
# 6Ô∏è‚É£ SPLIT DATASET INTO TRAIN & TEST
# ===============================================
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
print("‚úÖ Data split: ", X_train.shape, "train samples,", X_test.shape, "test samples.")

# ===============================================
# 7Ô∏è‚É£ TRAIN THE SVM CLASSIFIER
# ===============================================
print("‚è≥ Training SVM model...")
model = SVC(kernel='rbf', probability=True)
model.fit(X_train, y_train)
print("‚úÖ Model training completed.")

# ===============================================
# 8Ô∏è‚É£ EVALUATE MODEL
# ===============================================
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print("\nüéØ Model Accuracy:", accuracy)
print("\nüìú Classification Report:\n", classification_report(y_test, y_pred))

# ===============================================
# 9Ô∏è‚É£ SAVE THE TRAINED MODEL
# ===============================================
model_save_path = "../code/chainsaw_detection_model.pkl"
os.makedirs("../code", exist_ok=True)
joblib.dump(model, model_save_path)
print("‚úÖ Trained model saved at:", model_save_path)


‚úÖ Libraries loaded successfully.
üîπ Total audio samples: 8732
üîπ Illegal samples: 2803
üîπ Natural samples: 5929
‚è≥ Extracting features from audio files... This may take several minutes.




‚úÖ Feature extraction completed.
üîπ Feature dataset shape: (8732, 13)
‚úÖ Data split:  (6985, 13) train samples, (1747, 13) test samples.
‚è≥ Training SVM model...
‚úÖ Model training completed.

üéØ Model Accuracy: 0.7595878649112765

üìú Classification Report:
               precision    recall  f1-score   support

           0       0.77      0.92      0.84      1186
           1       0.72      0.41      0.52       561

    accuracy                           0.76      1747
   macro avg       0.74      0.67      0.68      1747
weighted avg       0.75      0.76      0.74      1747

‚úÖ Trained model saved at: ../code/chainsaw_detection_model.pkl
