In [None]:
import os   #to acess listing files in folder
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder  # # Encoding categorical labels
from tensorflow.keras import layers, models   # build N/N
from tensorflow.keras.applications import MobileNet
from tensorflow.keras.preprocessing.image import ImageDataGenerator  # Data Aug
from tensorflow.keras.regularizers import l2  # l2 reg.
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau #prevent overfitting & adjust lr
from sklearn.metrics import classification_report  #evaluate model pred.

# Function to load images from folder structure
def load_images_from_folder(folder_path, label):
    X, y = [], []   ## Lists to store images and labels
    for filename in os.listdir(folder_path):
        img_path = os.path.join(folder_path, filename)  # Get full file path
        img = cv2.imread(img_path)
        if img is not None:
            img = cv2.resize(img, (224, 224))
            img = img.astype('float32') / 255.0 # Normalize pixel values to range [0,1]
            X.append(img)
            y.append(label)
    return np.array(X), np.array(y) # Convert lists to NumPy arrays and return

# Function to train and evaluate the model using CNN (MobileNet)
def train_and_evaluate_cnn(X_train, y_train, X_val, y_val):

    label_encoder = LabelEncoder()
    y_train = label_encoder.fit_transform(y_train) #  Convert categorical labels to numerical values
    y_val = label_encoder.transform(y_val)

    # MobileNet as base model : Uses a pre-trained MobileNet model trained on ImageNet.
    base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(224, 224, 3)) #include_top=False → Removes the last classification layer.
    base_model.trainable = False  # Freeze entire MobileNet model

    # Create the CNN model
    inputs = layers.Input(shape=(224, 224, 3))
    x = base_model(inputs, training=False) #MobileNet as Feature extractor
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.01))(x)
    x = layers.BatchNormalization()(x)  # Stabilizes training
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Conv2D(128, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(0.01))(x)
    x = layers.BatchNormalization()(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.GlobalAveragePooling2D()(x)  # Convert feature maps to a single vector
    x = layers.Dense(256, activation='relu')(x) # Fully connected layer
    x = layers.Dropout(0.6)(x)  #to reduce overfitting
    outputs = layers.Dense(len(np.unique(y_train)), activation='softmax')(x) #o/p layer

    model = models.Model(inputs, outputs)   #Adam optimisation
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

    datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0.4,
        height_shift_range=0.4,
        shear_range=0.2,
        zoom_range=0.3,
        brightness_range=[0.8, 1.2],
        horizontal_flip=True,
        fill_mode='nearest'
    )

    # Early Stopping & Learning Rate Scheduler
    early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True) # Stops training if validation loss doesn’t improve for 5 epochs
    lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3)

    # Train the model
    history = model.fit(datagen.flow(X_train, y_train, batch_size=32),
                        epochs=30,  # Increased to allow early stopping
                        validation_data=(X_val, y_val),
                        callbacks=[early_stopping, lr_scheduler])

    # Evaluate the model
    val_loss, val_accuracy = model.evaluate(X_val, y_val)
    print(f"Validation Accuracy: {val_accuracy * 100:.2f}%")

    # Classification report
    y_val_pred = np.argmax(model.predict(X_val), axis=1)
    print("Classification Report:")
    print(classification_report(y_val, y_val_pred))

    return history, val_accuracy


# Define folder paths for each class
folder_paths = {
    'fault_A': '/content/drive/MyDrive/btp_dataset_folder/btp_dataset/faulta_noise_15dB',
    'fault_B': '/content/drive/MyDrive/btp_dataset_folder/btp_dataset/faultb_noise_15dB',
    'fault_C': '/content/drive/MyDrive/btp_dataset_folder/btp_dataset/faultc_noise_15dB',
    'healthy': '/content/drive/MyDrive/btp_dataset_folder/btp_dataset/healthy_noise_15dB',
}

# Initialize lists for training and validation data
X_train_total, y_train_total, X_val_total, y_val_total = [], [], [], []

# Loop to perform 70/30 split for each folder
for class_label, folder_path in folder_paths.items():
    X, y = load_images_from_folder(folder_path, class_label)
    X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

    X_train_total.append(X_train)
    y_train_total.append(y_train)
    X_val_total.append(X_val)
    y_val_total.append(y_val)

# Combine all 30% validation data from all classes
X_val_total = np.concatenate(X_val_total)
y_val_total = np.concatenate(y_val_total)

# Now train using 70% data from each class and 30% mixed validation data
X_train_total = np.concatenate(X_train_total)
y_train_total = np.concatenate(y_train_total)

# Train and evaluate the CNN model
history, val_accuracy = train_and_evaluate_cnn(X_train_total, y_train_total, X_val_total, y_val_total)

print(f"\nFinal validation accuracy: {val_accuracy * 100:.2f}%")


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive
