In [None]:
import os
import numpy as np
from tensorflow.keras.applications.vgg19 import VGG19, preprocess_input
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import xgboost as xgb

In [None]:
dataset_path = '/kaggle/input/palm-dataset-anemia/Palm'  # <-- replace with the correct path to your dataset
image_size = (224, 224)  # VGG-19 expects 224x224 input images

In [None]:
def load_images_and_labels(dataset_path):
    """
    Load the images from the dataset folder and extract labels based on the file name.
    Files starting with 'Anemic' are labeled as 1 and files starting with 'Non-anemic' are 0.
    """
    images = []
    labels = []
    j = 0
    i = 0
    k = 0
    for file_name in os.listdir(dataset_path):
        if file_name.endswith('.png'):
            # Check the file name prefix to determine the label.
            if file_name.startswith('Non'):
                labels.append(0)
                j+=1
            else:
                labels.append(1)
                i+=1
            # Load image and resize it to the input size of VGG-19.
            file_path = os.path.join(dataset_path, file_name)
            img = image.load_img(file_path, target_size=image_size)
            img_array = image.img_to_array(img)
            images.append(img_array)
    print(i)
    print(j)
    return np.array(images), np.array(labels)


In [None]:
X, y = load_images_and_labels('/kaggle/input/palm-dataset-anemia/Palm')
X = preprocess_input(X)  # Apply VGG-19 specific preprocessing

In [None]:
print(X.shape)
print(y.shape)

In [None]:
y

In [None]:
base_model = VGG19(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

In [None]:
gap_output = GlobalAveragePooling2D()(base_model.output)
feature_extractor = Model(inputs=base_model.input, outputs=gap_output)

In [None]:
print("Extracting features using pretrained VGG-19...")
features = feature_extractor.predict(X, batch_size=32, verbose=1)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(features, y, test_size=0.2, random_state=42)

In [None]:
dtrain = xgb.DMatrix(X_train, label=y_train)
dtest = xgb.DMatrix(X_test, label=y_test)

In [None]:
params = {
    'objective': 'binary:logistic',  # Logistic regression for binary classification
    'eval_metric': 'error',         # Evaluation metric is classification error rate
    'seed': 42
}

In [None]:
print("Training the XGBoost classifier...")
xgb_classifier = xgb.train(params, dtrain, num_boost_round=100)

In [None]:
y_pred_prob = xgb_classifier.predict(dtest)

In [None]:
y_pred = [1 if prob > 0.5 else 0 for prob in y_pred_prob]

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy: {accuracy:.4f}")

# The Accuracy of the Model is 93.54%

In [None]:
import matplotlib.pyplot as plt
import random

def plot_random_predictions(feature_extractor, dataset_path='/kaggle/input/palm-dataset-anemia/Palm', model=xgb_classifier, image_size=(224, 224), num_samples=5):
    anemic_files = [f for f in os.listdir(dataset_path) if f.startswith('Anemic') and f.endswith('.png')]
    non_anemic_files = [f for f in os.listdir(dataset_path) if f.startswith('Non') and f.endswith('.png')]

    selected_anemic = random.sample(anemic_files, num_samples)
    selected_non_anemic = random.sample(non_anemic_files, num_samples)
    selected_files = selected_anemic + selected_non_anemic

    plt.figure(figsize=(20, 6))
    
    for idx, file_name in enumerate(selected_files):
        actual_label = 1 if file_name.startswith('Anemic') else 0
        file_path = os.path.join(dataset_path, file_name)
        
        # Load and preprocess image
        img = image.load_img(file_path, target_size=image_size)
        img_array = image.img_to_array(img)
        img_array = np.expand_dims(img_array, axis=0)
        img_array = preprocess_input(img_array)

        # Extract features and predict
        features = feature_extractor.predict(img_array)
        dmatrix = xgb.DMatrix(features)
        pred_prob = model.predict(dmatrix)[0]
        pred_label = 1 if pred_prob > 0.5 else 0

        # Plotting
        plt.subplot(2, 5, idx + 1)
        plt.imshow(img)
        plt.axis('off')
        plt.title(f"Actual: {'Anemic' if actual_label else 'Non-anemic'}\nPred: {'Anemic' if pred_label else 'Non-anemic'}", 
                  color='green' if actual_label == pred_label else 'red')

    plt.tight_layout()
    plt.show()


In [None]:
plot_random_predictions(feature_extractor)

In [None]:
import pickle
file_name = "final_xgboost_palm_anemia_model.pkl"

# save
pickle.dump(xgb_classifier, open(file_name, "wb"))

In [None]:
def predict_anemia(image_path, feature_extractor, model, image_size=(224, 224)):
    from tensorflow.keras.preprocessing import image
    import numpy as np
    import xgboost as xgb
    import os

    if not os.path.exists(image_path):
        return "Error: Image path does not exist."

    # Load and preprocess the image
    img = image.load_img(image_path, target_size=image_size)
    img_array = image.img_to_array(img)
    img_array = np.expand_dims(img_array, axis=0)
    img_array = preprocess_input(img_array)

    # Extract features using the CNN
    features = feature_extractor.predict(img_array)

    # Predict using the trained XGBoost model
    dmatrix = xgb.DMatrix(features)
    pred_prob = model.predict(dmatrix)[0]
    pred_label = 1 if pred_prob > 0.5 else 0

    return "Anemic" if pred_label == 1 else "Non-anemic"


In [None]:
img_path = "/kaggle/input/palm-dataset-anemia/Palm/Non-AnemicP-001.png"
result = predict_anemia(img_path, feature_extractor, xgb_classifier)
print(f"Prediction: {result}")
