In [1]:
import os
import numpy as np
from PIL import Image
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import joblib

# Function to load images from a folder and assign a label
def load_images_from_folder(folder, label):
    images = []
    labels = []
    for filename in os.listdir(folder):
        if filename.endswith(".png"):  # Assuming the images are saved in PNG format
            img_path = os.path.join(folder, filename)
            #img = Image.open(img_path).convert('L')  # Convert to grayscale
            
            #out_of_sample_data_for_validation without LDA Accuracy: 87.65%	Resize to 32x32 pixels 	MFCC=25 RF=400 RGB 	*
            
            img = Image.open(img_path)  # Convert to grayscale
            img = img.resize((32, 32))  # Resize to 64x64 pixels
            img_array = np.array(img).flatten()  # Flatten the image to a 1D array
            images.append(img_array)
            labels.append(label)
    return images, labels

# Function to load dataset from multiple folders
def load_dataset(folders):
    data = []
    labels = []
    for folder, label in folders:
        print("Loading dataset from folder ",folder)
        images, lbls = load_images_from_folder(folder, label)
        data.extend(images)
        labels.extend(lbls)
    return np.array(data), np.array(labels)

train_MFCC_folder_1 = "D:\\Study\\Research\\Sem02_AppliedProject\\Dataset\\BUZZ Dataset\\BUZZ1_FULL\\BUZZ1\\bee\\train_MFCC25_NZ_folder_1"
train_MFCC_folder_2 = "D:\\Study\\Research\\Sem02_AppliedProject\\Dataset\\BUZZ Dataset\\BUZZ1_FULL\\BUZZ1\\cricket\\train_MFCC25_NZ_folder_2"
train_MFCC_folder_3 = "D:\\Study\\Research\\Sem02_AppliedProject\\Dataset\\BUZZ Dataset\\BUZZ1_FULL\\BUZZ1\\noise\\train_MFCC25_NZ_folder_3"

test_MFCC_folder_1 = "D:\\Study\\Research\\Sem02_AppliedProject\\Dataset\\BUZZ Dataset\\BUZZ1_FULL\\BUZZ1\\out_of_sample_data_for_validation\\bee_test\\train_MFCC25_NZ_folder_1"
test_MFCC_folder_2 = "D:\\Study\\Research\\Sem02_AppliedProject\\Dataset\\BUZZ Dataset\\BUZZ1_FULL\\BUZZ1\\out_of_sample_data_for_validation\\cricket_test\\train_MFCC25_NZ_folder_2"
test_MFCC_folder_3 = "D:\\Study\\Research\\Sem02_AppliedProject\\Dataset\\BUZZ Dataset\\BUZZ1_FULL\\BUZZ1\\out_of_sample_data_for_validation\\noise_test\\train_MFCC25_NZ_folder_3"

# Paths to training and test folders with labels
train_folders = [
    (train_MFCC_folder_1, "class_1"),
    (train_MFCC_folder_2, "class_2"),
    (train_MFCC_folder_3, "class_3"),
]

test_folders = [
    (test_MFCC_folder_1, "class_1"),
    (test_MFCC_folder_2, "class_2"),
    (test_MFCC_folder_3, "class_3"),
]

# Load training and test datasets
X_train, y_train = load_dataset(train_folders)
X_test, y_test = load_dataset(test_folders)

# Label encoding the target labels (strings to integers)
le = LabelEncoder()
y_train_encoded = le.fit_transform(y_train)
y_test_encoded = le.transform(y_test)

# Perform Linear Discriminant Analysis (LDA)
# print("Performing LDA")
# lda = LDA(n_components=2)
# X_train_lda = lda.fit_transform(X_train, y_train_encoded)
# X_test_lda = lda.transform(X_test)

# Train a Random Forest Classifier
print("Performing RF Training")
rf_model = RandomForestClassifier(n_estimators=400, random_state=42)
# rf_model.fit(X_train_lda, y_train_encoded)
rf_model.fit(X_train, y_train_encoded)

# Save the trained Random Forest model
model_filename = "trained_rf_model.pkl"
joblib.dump(rf_model, model_filename)

# Evaluate the model on test data
print("Performing predictions using RF model")
# y_pred = rf_model.predict(X_test_lda)
y_pred = rf_model.predict(X_test)

# Accuracy score
accuracy = accuracy_score(y_test_encoded, y_pred)
print(f"Accuracy: {accuracy * 100:.2f}%")

# Classification report
print("\nClassification Report:")
print(classification_report(y_test_encoded, y_pred, target_names=le.classes_))

# Confusion matrix
print("\nConfusion Matrix:")
print(confusion_matrix(y_test_encoded, y_pred))

# ROC-AUC Score (if it's a binary classification)
if len(np.unique(y_test_encoded)) == 2:
    print(f"ROC-AUC Score: {roc_auc_score(y_test_encoded, y_pred):.2f}")



Loading dataset from folder  D:\Study\Research\Sem02_AppliedProject\Dataset\BUZZ Dataset\BUZZ1_FULL\BUZZ1\bee\train_MFCC25_NZ_folder_1
Loading dataset from folder  D:\Study\Research\Sem02_AppliedProject\Dataset\BUZZ Dataset\BUZZ1_FULL\BUZZ1\cricket\train_MFCC25_NZ_folder_2
Loading dataset from folder  D:\Study\Research\Sem02_AppliedProject\Dataset\BUZZ Dataset\BUZZ1_FULL\BUZZ1\noise\train_MFCC25_NZ_folder_3
Loading dataset from folder  D:\Study\Research\Sem02_AppliedProject\Dataset\BUZZ Dataset\BUZZ1_FULL\BUZZ1\out_of_sample_data_for_validation\bee_test\train_MFCC25_NZ_folder_1
Loading dataset from folder  D:\Study\Research\Sem02_AppliedProject\Dataset\BUZZ Dataset\BUZZ1_FULL\BUZZ1\out_of_sample_data_for_validation\cricket_test\train_MFCC25_NZ_folder_2
Loading dataset from folder  D:\Study\Research\Sem02_AppliedProject\Dataset\BUZZ Dataset\BUZZ1_FULL\BUZZ1\out_of_sample_data_for_validation\noise_test\train_MFCC25_NZ_folder_3
Performing RF Training
Performing predictions using RF model


In [12]:


# Function to classify a new image using the trained model
def classify_image(image_path, model_filename, label_encoder):
    # Load the trained Random Forest model
    trained_model = joblib.load(model_filename)
    
            #     img = Image.open(img_path)  # Convert to grayscale
            # img = img.resize((32, 32))  # Resize to 64x64 pixels
            # img_array = np.array(img).flatten()  # Flatten the image to a 1D array
            # images.append(img_array)
            # labels.append(label)
    
    
    # Load and preprocess the sample image
    img = Image.open(image_path)  # Convert to grayscale
    img = img.resize((32, 32))  # Resize to 64x64 pixels
    img_array = np.array(img).flatten().reshape(1, -1)  # Flatten and reshape
    
    # Apply LDA transformation
    # img_lda = lda_model.transform(img_array)
    
    # Predict the class
    prediction = trained_model.predict(img_array)
    predicted_label = label_encoder.inverse_transform(prediction)
    
    return predicted_label[0]


# Example: Classify a new image
test_folder_1 = "D:\\Study\\Research\\Sem02_AppliedProject\\Dataset\\BUZZ Dataset\\BUZZ1_FULL\\BUZZ1\\out_of_sample_data_for_validation\\bee_test\\train_MFCC25_NZ_folder_1\\MFCC_192_168_4_6-2017-08-09_14-15-01_0.png"
test_folder_2 = "D:\\Study\\Research\\Sem02_AppliedProject\\Dataset\\BUZZ Dataset\\BUZZ1_FULL\\BUZZ1\\out_of_sample_data_for_validation\\cricket_test\\train_MFCC25_NZ_folder_2\\MFCC_cricket113_192_168_4_10-2017-08-29_03-45-01.png"
test_folder_3 = "D:\\Study\\Research\\Sem02_AppliedProject\\Dataset\\BUZZ Dataset\\BUZZ1_FULL\\BUZZ1\\out_of_sample_data_for_validation\\noise_test\\train_MFCC25_NZ_folder_3\\MFCC_noise20_192_168_4_10-2017-07-09_04-45-01_10.png"
sample_image_path = "sample_image.png"  # Replace with your sample image file path
predicted_label = classify_image(test_folder_3, model_filename, le)
print(f"Predicted Label for the sample image: {predicted_label}")


Predicted Label for the sample image: class_3
