In [13]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [14]:
# Load necessary libraries
import os
import numpy as np
import pandas as pd
import librosa
from tqdm import tqdm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout
from keras.callbacks import EarlyStopping, Callback
from sklearn.metrics import classification_report, accuracy_score

In [15]:
audio_dataset_path ='/content/drive/MyDrive/Colab Notebooks/Poultry Vocalization Signal Dataset for Early Disease Detection/Chicken_Audio_Dataset'

In [16]:
file_paths=[]
classes=[]

In [17]:
for folder_name in os.listdir(audio_dataset_path):
    folder_path = os.path.join(audio_dataset_path, folder_name)

    # Check if it's a directory
    if os.path.isdir(folder_path):
        # Iterate through each file in the folder
        for file_name in os.listdir(folder_path):
            if file_name.endswith('.wav'):
                file_path = os.path.join(folder_path, file_name)
                file_paths.append(file_path)
                classes.append(folder_name)

In [18]:
# Create DataFrame from the collected file paths and classes
metadata = pd.DataFrame({'file_path': file_paths, 'class': classes})
print(metadata.head())

                                           file_path  class
0  /content/drive/MyDrive/Colab Notebooks/Poultry...  Noise
1  /content/drive/MyDrive/Colab Notebooks/Poultry...  Noise
2  /content/drive/MyDrive/Colab Notebooks/Poultry...  Noise
3  /content/drive/MyDrive/Colab Notebooks/Poultry...  Noise
4  /content/drive/MyDrive/Colab Notebooks/Poultry...  Noise


In [19]:
# Function to extract MFCC features
def features_extractor(file):
    audio, sample_rate = librosa.load(file, res_type='kaiser_fast')
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T, axis=0)
    return mfccs_scaled_features

In [20]:
%pip install resampy



In [21]:
extracted_features = []
for _, row in tqdm(metadata.iterrows()):
    file_name = row['file_path']
    final_class_labels = row['class']
    data = features_extractor(file_name)
    extracted_features.append([data, final_class_labels])

346it [02:01,  2.84it/s]


In [22]:
extracted_features_df = pd.DataFrame(extracted_features, columns=['feature', 'class'])
extracted_features_df.head()

Unnamed: 0,feature,class
0,"[-205.92746, 134.27495, -38.85105, 32.43706, 4...",Noise
1,"[-196.6323, 126.03945, -37.07014, 25.206053, 4...",Noise
2,"[-160.9021, 126.01377, -54.62678, 15.125531, 3...",Noise
3,"[-185.45374, 122.36359, -53.871494, 21.235067,...",Noise
4,"[-252.11371, 123.52367, 1.0157044, 15.540068, ...",Noise


In [23]:
# Calculate Correlation Matrix
correlation_matrix = extracted_features_df['feature'].apply(pd.Series).corr()
print("Correlation Matrix:")
print(correlation_matrix)

Correlation Matrix:
          0         1         2         3         4         5         6   \
0   1.000000 -0.084022 -0.472368  0.082619 -0.016616 -0.193276 -0.109448   
1  -0.084022  1.000000 -0.392929 -0.692003  0.442662  0.315838  0.346728   
2  -0.472368 -0.392929  1.000000  0.198509 -0.404642  0.126406  0.126119   
3   0.082619 -0.692003  0.198509  1.000000 -0.267005 -0.288090 -0.356705   
4  -0.016616  0.442662 -0.404642 -0.267005  1.000000 -0.210011  0.114038   
5  -0.193276  0.315838  0.126406 -0.288090 -0.210011  1.000000  0.324009   
6  -0.109448  0.346728  0.126119 -0.356705  0.114038  0.324009  1.000000   
7   0.228053 -0.755363  0.315047  0.688059 -0.288505 -0.215461 -0.382201   
8   0.204014  0.133652 -0.370279  0.018329  0.381572 -0.325044  0.146928   
9  -0.058614 -0.319336  0.594330  0.167913 -0.596767  0.445688  0.120702   
10  0.161213  0.224384 -0.162254 -0.257884  0.326724 -0.088769  0.434915   
11  0.120999 -0.570800  0.265931  0.560949 -0.550182  0.042563 -0.17

In [27]:
# Binning (Equal Width Bins)
num_bins = 10  # Number of bins
bin_labels = range(num_bins)

In [28]:
# Apply binning to each feature separately
binned_features = []
for feature in extracted_features_df['feature']:
    binned_feature = pd.cut(feature, bins=num_bins, labels=bin_labels)
    binned_features.append(binned_feature)

extracted_features_df['binned_feature'] = binned_features
print("Binned Features:")
print(extracted_features_df[['feature', 'binned_feature']].head())

Binned Features:
                                             feature  \
0  [-205.92746, 134.27495, -38.85105, 32.43706, 4...   
1  [-196.6323, 126.03945, -37.07014, 25.206053, 4...   
2  [-160.9021, 126.01377, -54.62678, 15.125531, 3...   
3  [-185.45374, 122.36359, -53.871494, 21.235067,...   
4  [-252.11371, 123.52367, 1.0157044, 15.540068, ...   

                                      binned_feature  
0  [0, 9, 4, 7, 7, ..., 6, 6, 6, 6, 6]
Length: 40...  
1  [0, 9, 4, 6, 7, ..., 6, 6, 6, 6, 6]
Length: 40...  
2  [0, 9, 3, 6, 6, ..., 5, 5, 5, 5, 5]
Length: 40...  
3  [0, 9, 4, 6, 7, ..., 6, 6, 5, 6, 6]
Length: 40...  
4  [0, 9, 6, 7, 7, ..., 6, 6, 6, 6, 6]
Length: 40...  


In [29]:
# Normalization
scaler = StandardScaler()
scaled_features = scaler.fit_transform(np.vstack(extracted_features_df['feature']))
for i in range(scaled_features.shape[1]):
    extracted_features_df[f'scaled_feature_{i}'] = scaled_features[:, i]

In [30]:
# Data Preprocessing
X = np.array(extracted_features_df[[f'scaled_feature_{i}' for i in range(scaled_features.shape[1])]])
y = np.array(extracted_features_df['class'].tolist())
label_encoder = LabelEncoder()
y = to_categorical(label_encoder.fit_transform(y))

In [31]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=0)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=0)

In [32]:
# Model Definition
model = Sequential()
model.add(Flatten(input_shape=(X_train.shape[1],)))
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(label_encoder.classes_), activation='softmax'))
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten (Flatten)           (None, 40)                0         
                                                                 
 dense (Dense)               (None, 256)               10496     
                                                                 
 dropout (Dropout)           (None, 256)               0         
                                                                 
 dense_1 (Dense)             (None, 3)                 771       
                                                                 
Total params: 11267 (44.01 KB)
Trainable params: 11267 (44.01 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [33]:
# Custom Callback for Metrics Tracking
class MetricsCallback(Callback):
    def on_epoch_end(self, epoch, logs=None):
        y_pred = self.model.predict(X_test)
        y_pred_binary = np.argmax(y_pred, axis=1)
        print("Classification Report:")
        print(classification_report(np.argmax(y_test, axis=1), y_pred_binary, target_names=label_encoder.classes_))
        accuracy = accuracy_score(np.argmax(y_test, axis=1), y_pred_binary)
        print(f"Test Accuracy: {accuracy}")

metrics_callback = MetricsCallback()

In [34]:
# Training the Model
num_epochs = 100
batch_size = 32
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)

history = model.fit(X_train, y_train, batch_size=batch_size, epochs=num_epochs,
                    validation_data=(X_val, y_val), callbacks=[early_stopping, metrics_callback])

Epoch 1/100
Classification Report:
              precision    recall  f1-score   support

     Healthy       0.88      0.67      0.76        21
       Noise       0.70      0.70      0.70        10
   Unhealthy       0.81      1.00      0.89        21

    accuracy                           0.81        52
   macro avg       0.79      0.79      0.78        52
weighted avg       0.81      0.81      0.80        52

Test Accuracy: 0.8076923076923077
Epoch 2/100
Classification Report:
              precision    recall  f1-score   support

     Healthy       0.86      0.90      0.88        21
       Noise       0.75      0.60      0.67        10
   Unhealthy       0.95      1.00      0.98        21

    accuracy                           0.88        52
   macro avg       0.86      0.83      0.84        52
weighted avg       0.88      0.88      0.88        52

Test Accuracy: 0.8846153846153846
Epoch 3/100
Classification Report:
              precision    recall  f1-score   support

     Healt

In [35]:
# Evaluate the Model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy}")

Test Accuracy: 0.9230769276618958


In [36]:
# Prediction
y_pred = model.predict(X_test)
y_pred_binary = (y_pred > 0.5).astype(int)



In [37]:
# Classification Report
print("Classification Report:")
print(classification_report(np.argmax(y_test, axis=1), np.argmax(y_pred_binary, axis=1), target_names=label_encoder.classes_))

Classification Report:
              precision    recall  f1-score   support

     Healthy       0.90      0.90      0.90        21
       Noise       0.80      0.80      0.80        10
   Unhealthy       1.00      1.00      1.00        21

    accuracy                           0.92        52
   macro avg       0.90      0.90      0.90        52
weighted avg       0.92      0.92      0.92        52

