In [1]:
import numpy as np
import os
import pathlib
import cv2
from sklearn.model_selection import train_test_split, StratifiedKFold
from sklearn.metrics import accuracy_score, classification_report, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from keras.applications.mobilenet_v2 import MobileNetV2
from keras.layers import GlobalAveragePooling2D
from keras.models import Sequential
import matplotlib.pyplot as plt  # For displaying images

In [2]:
# Define constants
IMAGE_SHAPE = (200, 200)  # Resize all images to this shape for MobileNetV2
BATCH_SIZE = 100
TRAIN_DIR = 'F:/indhu/Datasets/bone fracture_ x-rays/train'  # Path to training dataset
VAL_DIR = 'F:/indhu/Datasets/bone fracture_ x-rays/val'  # Path to validation dataset

In [5]:
# Use TensorFlow Dataset to load images efficiently
def load_data_from_directory(data_dir, batch_size=BATCH_SIZE):
    data_dir = pathlib.Path(data_dir)
    categories = os.listdir(data_dir)
    label_dict = {category: index for index, category in enumerate(categories)}
    
    image_paths = []
    labels = []
    for category in categories:
        category_path = data_dir / category
        for image_path in pathlib.Path(category_path).glob('*'):
            image_paths.append(str(image_path))
            labels.append(label_dict[category])
    
    return image_paths, np.array(labels)

In [6]:
# Load image paths and labels
train_image_paths, y_train = load_data_from_directory(TRAIN_DIR)
val_image_paths, y_val = load_data_from_directory(VAL_DIR)

In [7]:
# Function to load and preprocess images
def load_and_preprocess_images(image_paths, batch_size=BATCH_SIZE):
    image_batch = []
    for i, image_path in enumerate(image_paths):
        img = cv2.imread(image_path)
        resized_img = cv2.resize(img, IMAGE_SHAPE)
        normalized_img = resized_img / 255.0  # Normalize images
        image_batch.append(normalized_img)
        
        if len(image_batch) == batch_size or i == len(image_paths) - 1:
            yield np.array(image_batch)
            image_batch = []  # Reset batch

In [8]:
# Create MobileNetV2 feature extractor
base_model_mobilenet = MobileNetV2(input_shape=IMAGE_SHAPE + (3,), include_top=False, weights='imagenet')
base_model_mobilenet.trainable = False  # Freeze the convolutional base

feature_extractor_mobilenet = Sequential([
    base_model_mobilenet,
    GlobalAveragePooling2D()
])

  base_model_mobilenet = MobileNetV2(input_shape=IMAGE_SHAPE + (3,), include_top=False, weights='imagenet')


In [9]:
# Extract features using MobileNetV2 (batch by batch)
train_features = []
for batch in load_and_preprocess_images(train_image_paths, batch_size=BATCH_SIZE):
    features = feature_extractor_mobilenet.predict(batch)
    train_features.append(features)

train_features = np.concatenate(train_features, axis=0)  # Combine all batches

val_features = []
for batch in load_and_preprocess_images(val_image_paths, batch_size=BATCH_SIZE):
    features = feature_extractor_mobilenet.predict(batch)
    val_features.append(features)

val_features = np.concatenate(val_features, axis=0)  # Combine all batches

[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 565ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 155ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 157ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 154ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 161ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 154ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 149ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 158ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 158ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 163ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 155ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 151ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 158ms/step
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [10]:
# Initialize classifiers
classifiers = {
    "Random Forest": RandomForestClassifier(n_estimators=100, random_state=42),
    "Logistic Regression": LogisticRegression(max_iter=200, random_state=42),
    "K-Nearest Neighbors": KNeighborsClassifier(n_neighbors=5),
    "Support Vector Machine": SVC(kernel='rbf', probability=True, random_state=42),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Naive Bayes": GaussianNB()
}

In [11]:
# Evaluate classifiers using validation set
for model_name, model in classifiers.items():
    print(f"Evaluating {model_name}...")
    model.fit(train_features, y_train)
    y_pred = model.predict(val_features)
    print(f"{model_name} Accuracy: {accuracy_score(y_val, y_pred):.4f}")
    print(classification_report(y_val, y_pred))

Evaluating Random Forest...
Random Forest Accuracy: 0.7217
              precision    recall  f1-score   support

           0       0.81      0.71      0.75       360
           1       0.63      0.75      0.68       240

    accuracy                           0.72       600
   macro avg       0.72      0.73      0.72       600
weighted avg       0.74      0.72      0.72       600

Evaluating Logistic Regression...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


Logistic Regression Accuracy: 0.6583
              precision    recall  f1-score   support

           0       0.74      0.66      0.70       360
           1       0.56      0.66      0.61       240

    accuracy                           0.66       600
   macro avg       0.65      0.66      0.65       600
weighted avg       0.67      0.66      0.66       600

Evaluating K-Nearest Neighbors...
K-Nearest Neighbors Accuracy: 0.7050
              precision    recall  f1-score   support

           0       0.87      0.60      0.71       360
           1       0.59      0.86      0.70       240

    accuracy                           0.70       600
   macro avg       0.73      0.73      0.70       600
weighted avg       0.76      0.70      0.71       600

Evaluating Support Vector Machine...
Support Vector Machine Accuracy: 0.7183
              precision    recall  f1-score   support

           0       0.77      0.75      0.76       360
           1       0.64      0.67      0.66       24

In [12]:
# Evaluate classifiers using k-fold cross-validation
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
for clf_name, clf in classifiers.items():
    print(f"K-Fold Cross Validation for {clf_name}...")
    precision_scores = []
    recall_scores = []
    f1_scores = []
    accuracy_scores = []

    for train_index, val_index in kf.split(train_features, y_train):
        X_train_fold, X_val_fold = train_features[train_index], train_features[val_index]
        y_train_fold, y_val_fold = y_train[train_index], y_train[val_index]

        clf.fit(X_train_fold, y_train_fold)
        y_val_pred = clf.predict(X_val_fold)

        accuracy_scores.append(accuracy_score(y_val_fold, y_val_pred))
        precision_scores.append(precision_score(y_val_fold, y_val_pred, average='weighted'))
        recall_scores.append(recall_score(y_val_fold, y_val_pred, average='weighted'))
        f1_scores.append(f1_score(y_val_fold, y_val_pred, average='weighted'))

    print(f"Average Accuracy: {np.mean(accuracy_scores):.4f}")
    print(f"Average Precision: {np.mean(precision_scores):.4f}")
    print(f"Average Recall: {np.mean(recall_scores):.4f}")
    print(f"Average F1 Score: {np.mean(f1_scores):.4f}\n")

K-Fold Cross Validation for Random Forest...
Average Accuracy: 0.9753
Average Precision: 0.9753
Average Recall: 0.9753
Average F1 Score: 0.9753

K-Fold Cross Validation for Logistic Regression...


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver opt

Average Accuracy: 0.9694
Average Precision: 0.9695
Average Recall: 0.9694
Average F1 Score: 0.9694

K-Fold Cross Validation for K-Nearest Neighbors...
Average Accuracy: 0.9927
Average Precision: 0.9927
Average Recall: 0.9927
Average F1 Score: 0.9927

K-Fold Cross Validation for Support Vector Machine...
Average Accuracy: 0.9903
Average Precision: 0.9904
Average Recall: 0.9903
Average F1 Score: 0.9903

K-Fold Cross Validation for Decision Tree...
Average Accuracy: 0.8139
Average Precision: 0.8144
Average Recall: 0.8139
Average F1 Score: 0.8139

K-Fold Cross Validation for Naive Bayes...
Average Accuracy: 0.7391
Average Precision: 0.7575
Average Recall: 0.7391
Average F1 Score: 0.7350

