In [2]:
import os
import librosa
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical
from tensorflow.image import resize
from tensorflow.keras.models import load_model

#import ini

In [19]:
#buat data audio jadi digital trs simpan ke csv

data_dir = r'C:\kuliah\semester 6\ilmu data 1\Proyek deteksi teriak\data_paper\Raw Audio'
classes = ['scream', 'non_scream']


def load_and_preprocess_data(data_dir, classes, target_shape=(128, 128)):
    mel_data = []
    zcr_data = []
    labels = []

    for i, class_name in enumerate(classes):
        class_dir = os.path.join(data_dir, class_name)
        for filename in os.listdir(class_dir):
            if filename.endswith('.wav'):
                file_path = os.path.join(class_dir, filename)
                audio_data, sample_rate = librosa.load(file_path, sr=None)
                # Perform preprocessing (e.g., convert to Mel spectrogram and resize)
                mel_spectrogram = librosa.feature.melspectrogram(y=audio_data, sr=sample_rate)
                mel_spectrogram = resize(np.expand_dims(mel_spectrogram, axis=-1), target_shape)
                mel_data.append(mel_spectrogram)
                
                # Calculate ZCR
                zcr = librosa.feature.zero_crossing_rate(y=audio_data)
                zcr = resize(np.expand_dims(zcr, axis=-1), target_shape)
                zcr_data.append(zcr)

                labels.append(class_name)

    return np.array(mel_data), np.array(zcr_data), np.array(labels)


data, zcr_data, labels = load_and_preprocess_data(data_dir, classes)

# Save Mel spectrogram data to a file
np.save('mel_data.npy', data)

# Save ZCR data to a file
np.save('zcr_data.npy', zcr_data)

# Save labels to a file
np.savetxt('labels.csv', labels, fmt='%s')




In [20]:
#jalankan kode dari sini, tidak usah buat baca ulang data audio 600mb lebih
# Load data and labels from CSV files
#bisa dibuat jadi pipeline
from sklearn.preprocessing import LabelEncoder
data = np.load('mel_data.npy')
labels = np.loadtxt('labels.csv', dtype=str)
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

In [22]:
# Load data and labels from CSV files
from sklearn.preprocessing import LabelEncoder

# Print original labels before encoding
print("Original labels:")
print(labels[0], labels[-1])

# Convert labels to integers using LabelEncoder
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Print encoded labels
print("Encoded labels:")
print(labels_encoded[0], labels_encoded[-1])

# Split data into training and testing sets
X_train, X_test, y_train_encoded, y_test_encoded = train_test_split(data, labels_encoded, test_size=0.2, random_state=42)

# Convert integer labels to one-hot encoding
num_classes = len(classes)
y_train = to_categorical(y_train_encoded, num_classes=num_classes)
y_test = to_categorical(y_test_encoded, num_classes=num_classes)


Original labels:
scream non_scream
Encoded labels:
1 0


In [23]:
print(data.shape)

(869, 128, 128, 1)


In [24]:
#Algoritma pertama, coba buat dengan cnn untuk baca grafik mel spectogram
input_shape = X_train[0].shape
input_layer = Input(shape=input_shape)
x = Conv2D(32, (3, 3), activation='relu')(input_layer)
x = MaxPooling2D((2, 2))(x)
x = Conv2D(64, (3, 3), activation='relu')(x)
x = MaxPooling2D((2, 2))(x)
x = Flatten()(x)
x = Dense(64, activation='relu')(x)
output_layer = Dense(len(classes), activation='softmax')(x)
model = Model(input_layer, output_layer)

# Step 5: Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

# Step 6: Train the model
model.fit(X_train, y_train, epochs=30, batch_size=32, validation_data=(X_test, y_test))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.callbacks.History at 0x1d1cb1d9a80>

In [26]:
from sklearn.metrics import classification_report

# Map class indices to class labels
class_labels = {0: 'non_scream', 1: 'scream'}

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print("Test Accuracy:", test_accuracy)

# Predict labels for the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Convert one-hot encoded labels back to original labels
y_test_classes = np.argmax(y_test, axis=1)

# Generate classification report with class labels
print("Classification Report:")
print(classification_report(y_test_classes, y_pred_classes, target_names=[class_labels[i] for i in range(len(class_labels))]))

# Save the model
model.save('cnn_model.h5')


Test Accuracy: 0.982758641242981
Classification Report:
              precision    recall  f1-score   support

  non_scream       0.99      0.98      0.99       112
      scream       0.97      0.98      0.98        62

    accuracy                           0.98       174
   macro avg       0.98      0.98      0.98       174
weighted avg       0.98      0.98      0.98       174



### Logistic regression


In [27]:
#jalankan kode dari sini, tidak usah buat baca ulang data audio 600mb lebih
# Load data and labels from CSV files
from sklearn.preprocessing import LabelEncoder
mel_data = np.load('mel_data.npy')
zcr_data = np.load('zcr_data.npy')
labels = np.loadtxt('labels.csv', dtype=str)
# Split data into training and testing sets

In [49]:
#bisa diubah jadi pipeline
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
import numpy as np

# Load Mel spectrogram data, ZCR data, and labels
mel_data = np.load('mel_data.npy')
zcr_data = np.load('zcr_data.npy')
labels = np.loadtxt('labels.csv', dtype=str)

# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Flatten Mel spectrogram data and ZCR data
mel_data_flat = mel_data.reshape(mel_data.shape[0], -1)
zcr_data_flat = zcr_data.reshape(zcr_data.shape[0], -1)

# Combine flattened Mel spectrogram data and ZCR data
X_combined_flat = np.concatenate((mel_data_flat, zcr_data_flat), axis=1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_combined_flat, labels_encoded, test_size=0.2, random_state=42)

In [36]:


# Create and train logistic regression model
log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)

# Predict labels for testing set
y_pred = log_reg.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

# Generate classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))


Test Accuracy: 0.9712643678160919
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       112
           1       0.97      0.95      0.96        62

    accuracy                           0.97       174
   macro avg       0.97      0.97      0.97       174
weighted avg       0.97      0.97      0.97       174



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [40]:
from joblib import dump, load

# Define the filename for saving the model
logistic_model_filename = 'logistic_regression_model.joblib'

# Save the logistic regression model to a file
dump(log_reg, logistic_model_filename)


['logistic_regression_model.joblib']

In [None]:
'''
#Kalau mau load model biar tidak train ulang
from joblib import load

# Load the saved model from file
loaded_model = load('svm_model.joblib')

# Now you can use the loaded model for predictions
'''

### SVM

In [48]:
from sklearn.svm import SVC
from sklearn.metrics import classification_report
from sklearn.model_selection import GridSearchCV

# Reshape the input data for SVM
# X_train_flattened = X_train.reshape(X_train.shape[0], -1)
# X_test_flattened = X_test.reshape(X_test.shape[0], -1)

# Define the parameter grid for grid search
param_grid = {
    'kernel': ['linear', 'poly', 'rbf', 'sigmoid'],
    'C': [0.1, 1, 10, 100]
}

# Initialize SVM model
svm_model = SVC()

# Perform grid search
grid_search = GridSearchCV(estimator=svm_model, param_grid=param_grid, cv=5)
grid_search.fit(X_train, y_train)

# Get the best parameters
best_params = grid_search.best_params_
print("Best parameters:", best_params)




Best parameters: {'C': 100, 'kernel': 'rbf'}


In [47]:
# Evaluate the model with best parameters
best_svm_model = SVC(**best_params)
best_svm_model.fit(X_train, y_train)
y_pred = best_svm_model.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)
print(classification_report(y_test, y_pred, target_names=classes))

Test Accuracy: 0.9712643678160919
              precision    recall  f1-score   support

      scream       0.98      0.97      0.98       112
  non_scream       0.95      0.97      0.96        62

    accuracy                           0.97       174
   macro avg       0.97      0.97      0.97       174
weighted avg       0.97      0.97      0.97       174



In [None]:
from joblib import dump

# Define the filename for saving the model
model_filename = 'svm_model.joblib'

# Save the model to a file
dump(best_svm_model, model_filename)

### Bandingkan semua model

In [52]:
from sklearn.metrics import classification_report
from joblib import load
from keras.models import load_model
#jalankan kode dari sini, tidak usah buat baca ulang data audio 600mb lebih
# Load data and labels from CSV files
from sklearn.preprocessing import LabelEncoder
data = np.load('mel_data.npy')
labels = np.loadtxt('labels.csv', dtype=str)
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

# Load data and labels from CSV files
from sklearn.preprocessing import LabelEncoder

# Print original labels before encoding
print("Original labels:")
print(labels[0], labels[-1])

# Convert labels to integers using LabelEncoder
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Print encoded labels
print("Encoded labels:")
print(labels_encoded[0], labels_encoded[-1])

# Split data into training and testing sets
X_train, X_test, y_train_encoded, y_test_encoded = train_test_split(data, labels_encoded, test_size=0.2, random_state=42)

# Convert integer labels to one-hot encoding
num_classes = len(classes)
y_train = to_categorical(y_train_encoded, num_classes=num_classes)
y_test = to_categorical(y_test_encoded, num_classes=num_classes)





# Load the saved model from file
model = load_model('cnn_model.h5')
print('CNN model')
# Map class indices to class labels
class_labels = {0: 'non_scream', 1: 'scream'}

# Evaluate the model on the test set
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)
print("Test Accuracy:", test_accuracy)

# Predict labels for the test set
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Convert one-hot encoded labels back to original labels
y_test_classes = np.argmax(y_test, axis=1)

# Generate classification report with class labels
print("Classification Report:")
print(classification_report(y_test_classes, y_pred_classes, target_names=[class_labels[i] for i in range(len(class_labels))]))



Original labels:
scream non_scream
Encoded labels:
1 0
CNN model
Test Accuracy: 0.982758641242981
Classification Report:
              precision    recall  f1-score   support

  non_scream       0.99      0.98      0.99       112
      scream       0.97      0.98      0.98        62

    accuracy                           0.98       174
   macro avg       0.98      0.98      0.98       174
weighted avg       0.98      0.98      0.98       174



In [54]:

# Load Mel spectrogram data, ZCR data, and labels
mel_data = np.load('mel_data.npy')
zcr_data = np.load('zcr_data.npy')
labels = np.loadtxt('labels.csv', dtype=str)

# Encode labels
label_encoder = LabelEncoder()
labels_encoded = label_encoder.fit_transform(labels)

# Flatten Mel spectrogram data and ZCR data
mel_data_flat = mel_data.reshape(mel_data.shape[0], -1)
zcr_data_flat = zcr_data.reshape(zcr_data.shape[0], -1)

# Combine flattened Mel spectrogram data and ZCR data
X_combined_flat = np.concatenate((mel_data_flat, zcr_data_flat), axis=1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_combined_flat, labels_encoded, test_size=0.2, random_state=42)
log_reg = load('logistic_regression_model.joblib')
print('Logistic regression model')

# Predict labels for testing set
y_pred = log_reg.predict(X_test)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)

# Generate classification report
print("Classification Report:")
print(classification_report(y_test, y_pred))


Logistic regression model
Test Accuracy: 0.9712643678160919
Classification Report:
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       112
           1       0.97      0.95      0.96        62

    accuracy                           0.97       174
   macro avg       0.97      0.97      0.97       174
weighted avg       0.97      0.97      0.97       174



In [56]:
# Evaluate the model with best parameters
best_svm_model = load('svm_model.joblib')
y_pred = best_svm_model.predict(X_test)
# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)
print(classification_report(y_test, y_pred, target_names=classes))

Test Accuracy: 0.9712643678160919
              precision    recall  f1-score   support

      scream       0.98      0.97      0.98       112
  non_scream       0.95      0.97      0.96        62

    accuracy                           0.97       174
   macro avg       0.97      0.97      0.97       174
weighted avg       0.97      0.97      0.97       174

