In [4]:
import os
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report
import librosa

# Function to extract features from sound files
def extract_features(file_path, sample_rate=16000, n_mfcc=13):
    audio, sr = librosa.load(file_path, sr=sample_rate, res_type='kaiser_fast')
    mfccs = librosa.feature.mfcc(y=audio, sr=sr, n_mfcc=n_mfcc)
    rmse = librosa.feature.rms(y=audio)
    spectral_centroid = librosa.feature.spectral_centroid(y=audio, sr=sr)

    # Calculate mean values
    mean_mfcc = np.mean(mfccs, axis=1)
    mean_rmse = np.mean(rmse)
    mean_centroid = np.mean(spectral_centroid)

    return np.concatenate([mean_mfcc, [mean_rmse], [mean_centroid]])

# Function to read and process sound data
def read_and_process_sound_data(sound_dir, label_path):
    labels_df = pd.read_csv(label_path)
    features = []
    labels = []

    for filename in os.listdir(sound_dir):
        if filename.endswith('.wav'):
            sound_path = os.path.join(sound_dir, filename)
            label_row = labels_df[labels_df['file'] == filename]

            if not label_row.empty:
                label = label_row.iloc[0]['Label']
                feature = extract_features(sound_path)
                features.append(feature)
                labels.append(label)
            else:
                print(f"Label not found for file: {filename}")

    return np.array(features), np.array(labels)

# Path to data directories
train_sound_dir = 'suara/data/sound/train/'
train_label_path = 'suara/data/labels/train/feature_train.csv'
val_sound_dir = 'suara/data/sound/val/'
val_label_path = 'suara/data/labels/val/feature_val.csv'

# Read and process sound data for training
X_train, y_train = read_and_process_sound_data(train_sound_dir, train_label_path)

# Read and process sound data for validation
X_val, y_val = read_and_process_sound_data(val_sound_dir, val_label_path)

# Standardize features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)

# Define SVM model
svm_model = SVC(kernel='linear', C=1)

# Train the model
svm_model.fit(X_train_scaled, y_train)

# Make predictions on the validation set
y_pred = svm_model.predict(X_val_scaled)

# Evaluate the model
accuracy = accuracy_score(y_val, y_pred)
print(f"Accuracy: {accuracy}")
print(classification_report(y_val, y_pred))


Accuracy: 0.9879275653923542
              precision    recall  f1-score   support

           0       0.98      1.00      0.99       250
           1       1.00      0.98      0.99       247

    accuracy                           0.99       497
   macro avg       0.99      0.99      0.99       497
weighted avg       0.99      0.99      0.99       497



In [5]:
import joblib

# Simpan model ke file
model_filename = 'suara_model_svm.pkl'
joblib.dump(svm_model, model_filename)


['suara_model_svm.pkl']