In [None]:
import librosa
import numpy as np
import os
import pandas as pd
from google.colab import drive


import librosa
import numpy as np

def feature_extraction(file_path):
    # Load your audio file
    y, sr = librosa.load(file_path)

    pre_emphasis = 0.97
    # Pre-emphasis
    y = np.append(y[0], y[1:] - pre_emphasis * y[:-1])

    window_size = 0.025  # 25ms window size
    hop_size = 0.010  # 10ms hop size
    # Window and hop length in samples
    n_fft = int(window_size * sr)  # 25ms window size
    hop_length = int(hop_size * sr)  # 10ms hop size

    # Compute the Short-Time Fourier Transform (STFT)
    Short_Time_Fourier_Transform = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    S, _ = librosa.magphase(Short_Time_Fourier_Transform)

    # Compute the energy feature from the magnitude spectrogram
    energy = librosa.feature.rms(S=S, frame_length=n_fft, hop_length=hop_length)

    # Extract the Mel filter bank features and compute log-mel spectrogram
    mel_filter_spectrogram = librosa.feature.melspectrogram(S=S, sr=sr, n_fft=n_fft, hop_length=hop_length)
    log_mel_spectrogram = librosa.power_to_db(mel_filter_spectrogram)

    # Extract 12 MFCCs (excluding the 0th coefficient)
    mfccs = librosa.feature.mfcc(S=log_mel_spectrogram, sr=sr, n_mfcc=12)

    # Calculate the delta and delta-delta features for MFCCs
    delta_mfccs = librosa.feature.delta(mfccs, order=1)
    double_delta_mfccs = librosa.feature.delta(mfccs, order=2)

    # Calculate the delta and delta-delta for energy
    delta_energy = librosa.feature.delta(energy, order=1)
    double_delta_energy = librosa.feature.delta(energy, order=2)

    # Concatenate MFCCs and energy along with their delta and delta-delta features
    combined_features = np.vstack([
        mfccs,
        energy,
        delta_mfccs,
        delta_energy,
        double_delta_mfccs,
        double_delta_energy
    ])

    # Ensure you have 39 total features: 13 (12 MFCC + 1 energy) * 3 = 39
    assert combined_features.shape[0] == 39, f"Number of features does not match 39, but {combined_features.shape[0]}"

    return combined_features

root_dir = ['/content/drive/My Drive/training data', '/content/drive/My Drive/testing data']
for root in  root_dir:
    # Iterate through all directories and files
    for subdir, dirs, files in os.walk(root):


        for file in files:
            if file.endswith('.wav'):
                filepath = os.path.join(subdir, file)
                print(f'Processing {filepath}')
                features = feature_extraction(filepath)

                print(features.shape)


                # Save features to CSV

                feature_df = pd.DataFrame(features).T  # Transpose to have frames as rows
                csv_filename = os.path.splitext(filepath)[0] + '_features.csv'
                feature_df.to_csv(csv_filename, index=False)
                print(f'Saved features to {csv_filename}')

Processing /content/drive/My Drive/training data/Ramallah_Reef/ramallah-reef_train043.wav
(39, 12120)
Saved features to /content/drive/My Drive/training data/Ramallah_Reef/ramallah-reef_train043_features.csv
Processing /content/drive/My Drive/training data/Ramallah_Reef/ramallah-reef_train042.wav
(39, 1441)
Saved features to /content/drive/My Drive/training data/Ramallah_Reef/ramallah-reef_train042_features.csv
Processing /content/drive/My Drive/training data/Ramallah_Reef/ramallah-reef_train041.wav
(39, 4878)
Saved features to /content/drive/My Drive/training data/Ramallah_Reef/ramallah-reef_train041_features.csv
Processing /content/drive/My Drive/training data/Ramallah_Reef/ramallah-reef_test025.wav
(39, 3444)
Saved features to /content/drive/My Drive/training data/Ramallah_Reef/ramallah-reef_test025_features.csv
Processing /content/drive/My Drive/training data/Ramallah_Reef/ramallah-reef_test024.wav
(39, 2258)
Saved features to /content/drive/My Drive/training data/Ramallah_Reef/ram

In [None]:
import os
import pandas as pd
import numpy as np

# Directory containing folders
big_directorys = ['/content/drive/My Drive/training data/', '/content/drive/My Drive/testing data/']

for big_directory in big_directorys:
    # Initialize an empty list to store aggregated features and targets
    all_data = []

    # Iterate through each folder in the big directory
    for folder_name in os.listdir(big_directory):
        folder_path = os.path.join(big_directory, folder_name)
        if os.path.isdir(folder_path):
            # Iterate through each file in the folder
            for filename in os.listdir(folder_path):
                if filename.endswith(".csv"):
                    # Load the data
                    file_path = os.path.join(folder_path, filename)
                    df = pd.read_csv(file_path)

                    # Aggregate Features
                    mean_features = df.mean(axis=0)

                    # Extract target from folder name
                    target = folder_name

                    # Append aggregated features and target to the list
                    all_data.append(np.append(mean_features, target))

    # Define column names for the DataFrame
    num_features = len(mean_features)
    columns = [f'Feature_{i+1}' for i in range(num_features)] + ['Target']

    # Create DataFrame from the list
    final_df = pd.DataFrame(all_data, columns=columns)


    # Save aggregated features with targets into a single CSV file
    output_file_path = os.path.join(big_directory, 'aggregated_features_with_targets.csv')
    final_df.to_csv(output_file_path, index=False)

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier


# Read data from CSV file
df = pd.read_csv('/content/drive/My Drive/training data/aggregated_features_with_targets.csv')
dataSet = df.sample(frac=1, random_state=42) # Shuffle the data

# Split columns between features and target
training_features = dataSet.drop('Target', axis=1)
training_target = dataSet['Target']



In [None]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

dataSet = pd.read_csv('/content/drive/My Drive/testing data/aggregated_features_with_targets.csv')

# Split columns between features and target
testing_features = dataSet.drop('Target', axis=1)
testing_target = dataSet['Target']


In [None]:


import pandas as pd

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.neighbors import KNeighborsClassifier

# Applying the knn Classifier With K = 4 to the data
knnClassifierWithK4 = KNeighborsClassifier(n_neighbors=4)
knnClassifierWithK4.fit(training_features, training_target)

# Predicting the test set results
knnClassifierWithK4Predictions = knnClassifierWithK4.predict(testing_features)

# Calculating the accuracy of the model
knnClassifierWithK4Accuracy = accuracy_score(testing_target, knnClassifierWithK4Predictions)
print(f'KNN Classifier with K=4 Accuracy: {knnClassifierWithK4Accuracy}')
print(classification_report(testing_target, knnClassifierWithK4Predictions))
# Print real and predicted labels
for i in range(len(testing_target)):
    print(f'Real: {testing_target[i]}, Predicted: {knnClassifierWithK4Predictions[i]}')



KNN Classifier with K=4 Accuracy: 0.25
               precision    recall  f1-score   support

       Hebron       0.25      0.40      0.31         5
    Jerusalem       0.17      0.20      0.18         5
       Nablus       1.00      0.20      0.33         5
Ramallah_Reef       0.20      0.20      0.20         5

     accuracy                           0.25        20
    macro avg       0.40      0.25      0.26        20
 weighted avg       0.40      0.25      0.26        20

Real: Ramallah_Reef, Predicted: Hebron
Real: Ramallah_Reef, Predicted: Hebron
Real: Ramallah_Reef, Predicted: Jerusalem
Real: Ramallah_Reef, Predicted: Ramallah_Reef
Real: Ramallah_Reef, Predicted: Hebron
Real: Nablus, Predicted: Nablus
Real: Nablus, Predicted: Jerusalem
Real: Nablus, Predicted: Hebron
Real: Nablus, Predicted: Jerusalem
Real: Nablus, Predicted: Ramallah_Reef
Real: Jerusalem, Predicted: Jerusalem
Real: Jerusalem, Predicted: Hebron
Real: Jerusalem, Predicted: Hebron
Real: Jerusalem, Predicted: Rama

In [None]:

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
# Support Vector Machine (SVM)
SVM = SVC()

# Hyperparameters to test
svm_parameters = {'kernel': ['linear', 'poly', 'rbf', 'sigmoid'], 'degree': [2, 3, 4, 5]}

# Grid Search with cross-validation
svm_grid_search = GridSearchCV(estimator=SVM, param_grid=svm_parameters, cv=5, scoring='accuracy', n_jobs=-1)
svm_grid_search.fit(training_features, training_target)

# Best parameters and score
svm_best_params = svm_grid_search.best_params_
svm_best_score = svm_grid_search.best_score_

print("Best Parameters for SVM:", svm_best_params)
print("Best Cross-Validation Score:", svm_best_score)

best_svm_model = svm_grid_search.best_estimator_
predictedTargetSVM = best_svm_model.predict(testing_features)
accuracySVM = accuracy_score(testing_target, predictedTargetSVM)
print("Accuracy of SVM :", accuracySVM)
print(classification_report(testing_target, predictedTargetSVM))

# Print real and predicted labels
for i in range(len(testing_target)):
    print(f'Real: {testing_target[i]}, Predicted: {predictedTargetSVM[i]}')


Best Parameters for SVM: {'degree': 2, 'kernel': 'linear'}
Best Cross-Validation Score: 0.6
Accuracy of SVM : 0.7
               precision    recall  f1-score   support

       Hebron       0.80      0.80      0.80         5
    Jerusalem       0.83      1.00      0.91         5
       Nablus       0.67      0.40      0.50         5
Ramallah_Reef       0.50      0.60      0.55         5

     accuracy                           0.70        20
    macro avg       0.70      0.70      0.69        20
 weighted avg       0.70      0.70      0.69        20

Real: Ramallah_Reef, Predicted: Ramallah_Reef
Real: Ramallah_Reef, Predicted: Ramallah_Reef
Real: Ramallah_Reef, Predicted: Ramallah_Reef
Real: Ramallah_Reef, Predicted: Nablus
Real: Ramallah_Reef, Predicted: Hebron
Real: Nablus, Predicted: Nablus
Real: Nablus, Predicted: Ramallah_Reef
Real: Nablus, Predicted: Jerusalem
Real: Nablus, Predicted: Nablus
Real: Nablus, Predicted: Ramallah_Reef
Real: Jerusalem, Predicted: Jerusalem
Real: Jerusa

In [None]:

# Random Forest
RFC = RandomForestClassifier()

# Hyperparameters to test
rfc_parameters = {"n_estimators": [10, 50, 100, 200], "max_depth": [5, 10, 20, 50]}

# Grid Search with cross-validation
RFC_classifier_grid_search = GridSearchCV(estimator=RFC, param_grid=rfc_parameters, cv=5, scoring='accuracy', n_jobs=-1)
RFC_classifier_grid_search.fit(training_features, training_target)

# Best parameters and score
RFC_best_params = RFC_classifier_grid_search.best_params_
RFC_best_score = RFC_classifier_grid_search.best_score_

print("Best Parameters for RFC:", RFC_best_params)
print("Best Cross-Validation Score:", RFC_best_score)

best_rfc_model = RFC_classifier_grid_search.best_estimator_
predictedTargetRFC = best_rfc_model.predict(testing_features)
accuracyRFC = accuracy_score(testing_target, predictedTargetRFC)

print("Accuracy of RFC :", accuracyRFC)
print(classification_report(testing_target, predictedTargetRFC))


# Print real and predicted labels
for i in range(len(testing_target)):
    print(f'Real: {testing_target[i]}, Predicted: {predictedTargetRFC[i]}')

Best Parameters for RFC: {'max_depth': 5, 'n_estimators': 200}
Best Cross-Validation Score: 0.65
Accuracy of RFC : 0.3
               precision    recall  f1-score   support

       Hebron       0.30      0.60      0.40         5
    Jerusalem       0.40      0.40      0.40         5
       Nablus       0.50      0.20      0.29         5
Ramallah_Reef       0.00      0.00      0.00         5

     accuracy                           0.30        20
    macro avg       0.30      0.30      0.27        20
 weighted avg       0.30      0.30      0.27        20

Real: Ramallah_Reef, Predicted: Hebron
Real: Ramallah_Reef, Predicted: Hebron
Real: Ramallah_Reef, Predicted: Jerusalem
Real: Ramallah_Reef, Predicted: Nablus
Real: Ramallah_Reef, Predicted: Hebron
Real: Nablus, Predicted: Nablus
Real: Nablus, Predicted: Jerusalem
Real: Nablus, Predicted: Ramallah_Reef
Real: Nablus, Predicted: Hebron
Real: Nablus, Predicted: Hebron
Real: Jerusalem, Predicted: Hebron
Real: Jerusalem, Predicted: Jerusal