In [7]:
import os
import numpy as np
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import classification_report, confusion_matrix
from tensorflow.keras.utils import to_categorical

Resnet And Feature Extraction

In [8]:
# Directory containing your images
image_dir = 'D:/Railway Track Damage Detection/Rail-damage-detection/dataset'

# Initialize ResNet50 model, excluding the top layers
resnet_model = ResNet50(weights='imagenet', include_top=False, pooling='avg')

# Function to extract features from a batch of images
def extract_features_from_batch(img_paths):
    batch_images = []
    for img_path in img_paths:
        img = image.load_img(img_path, target_size=(224, 224))
        img_data = image.img_to_array(img)
        img_data = preprocess_input(img_data)
        batch_images.append(img_data)
    batch_images = np.array(batch_images)
    features = resnet_model.predict(batch_images)
    return features

# Get image paths and corresponding labels
image_paths = []
labels = []
class_names = {'Non Defective': 0, 'Defective': 1}

for class_name, label in class_names.items():
    class_dir = os.path.join(image_dir, class_name)
    if os.path.isdir(class_dir):
        for image_name in os.listdir(class_dir):
            image_path = os.path.join(class_dir, image_name)
            image_paths.append(image_path)
            labels.append(label)

# Convert labels to numpy array
labels = np.array(labels)

# Print class distribution
unique, counts = np.unique(labels, return_counts=True)
class_distribution = dict(zip(unique, counts))
print(f"Class distribution: {class_distribution}")

Class distribution: {0: 6953, 1: 6730}


Load Images

In [9]:
# Split the dataset into training+validation and test sets (60% training, 20% validation, 20% test)
X_train_val_paths, X_test_paths, y_train_val, y_test = train_test_split(image_paths, labels, test_size=0.2, random_state=42, stratify=labels)
X_train_paths, X_val_paths, y_train, y_val = train_test_split(X_train_val_paths, y_train_val, test_size=0.25, random_state=42, stratify=y_train_val)

# Verify the sizes of the splits
print(f"Training set size: {len(X_train_paths)}")
print(f"Validation set size: {len(X_val_paths)}")
print(f"Test set size: {len(X_test_paths)}")

Training set size: 8209
Validation set size: 2737
Test set size: 2737


In [10]:
# Function to process images in batches
def process_in_batches(image_paths, batch_size=32):
    features = []
    for i in range(0, len(image_paths), batch_size):
        batch_paths = image_paths[i:i+batch_size]
        batch_features = extract_features_from_batch(batch_paths)
        features.append(batch_features)
    return np.vstack(features)

In [11]:
# Extract features for training, validation, and test sets
X_train = process_in_batches(X_train_paths)
X_val = process_in_batches(X_val_paths)
X_test = process_in_batches(X_test_paths)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━

In [12]:
# Use Stratified K-Fold Cross-Validation
skf = StratifiedKFold(n_splits=5)
fold_num = 1
validation_accuracies = []

for train_index, val_index in skf.split(X_train_val_paths, y_train_val):
    X_train_fold_paths = np.array(X_train_val_paths)[train_index]
    X_val_fold_paths = np.array(X_train_val_paths)[val_index]
    y_train_fold = y_train_val[train_index]
    y_val_fold = y_train_val[val_index]
    
    # Process the folds in batches
    X_train_fold = process_in_batches(X_train_fold_paths)
    X_val_fold = process_in_batches(X_val_fold_paths)
    
    # Train and evaluate the model
    knn = KNeighborsClassifier(n_neighbors=5)
    knn.fit(X_train_fold, y_train_fold)
    y_val_pred = knn.predict(X_val_fold)
    
    val_accuracy = np.mean(y_val_pred == y_val_fold)
    validation_accuracies.append(val_accuracy)
    
    print(f'Fold {fold_num}, Validation Fold Accuracy: {val_accuracy * 100:.2f}%')
    print("Validation Fold Classification Report:")
    print(classification_report(y_val_fold, y_val_pred, target_names=class_names.keys()))
    print("Validation Fold Confusion Matrix:")
    print(confusion_matrix(y_val_fold, y_val_pred))
    fold_num += 1

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1s/step
[1m1/1[0m [32m━━━

In [13]:
# Evaluate on the test set
knn.fit(X_train, y_train)
y_test_pred = knn.predict(X_test)

test_accuracy = np.mean(y_test_pred == y_test)
print(f'Test Accuracy: {test_accuracy * 100:.2f}%')
print("Test Classification Report:")
print(classification_report(y_test, y_test_pred, target_names=class_names.keys()))
print("Test Confusion Matrix:")
print(confusion_matrix(y_test, y_test_pred))

Test Accuracy: 99.34%
Test Classification Report:
               precision    recall  f1-score   support

Non Defective       0.99      1.00      0.99      1391
    Defective       1.00      0.99      0.99      1346

     accuracy                           0.99      2737
    macro avg       0.99      0.99      0.99      2737
 weighted avg       0.99      0.99      0.99      2737

Test Confusion Matrix:
[[1386    5]
 [  13 1333]]
