In [18]:
import os
import numpy as np
import cv2
from skimage.feature import hog
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, StackingClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, recall_score, log_loss
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model
from tensorflow.keras.layers import GlobalAveragePooling2D
from tensorflow.keras.preprocessing.image import img_to_array, load_img


In [19]:
# Define paths
base_dir = 'D:\PKG - C-NMC 2019\C-NMC_training_data'
folds = ['fold_0', 'fold_1', 'fold_2']
categories = ['all', 'hem']

# Parameters
img_width, img_height = 150, 150


In [20]:
def load_images_and_labels(base_dir, folds, categories, img_width, img_height):
    images = []
    labels = []
    for fold in folds:
        for category in categories:
            category_dir = os.path.join(base_dir, fold, category)
            label = 0 if category == 'all' else 1
            for filename in os.listdir(category_dir):
                img_path = os.path.join(category_dir, filename)
                img = cv2.imread(img_path)
                if img is not None:
                    img = cv2.resize(img, (img_width, img_height))
                    images.append(img)
                    labels.append(label)
    return np.array(images), np.array(labels)

# Load images and labels
images, labels = load_images_and_labels(base_dir, folds, categories, img_width, img_height)


In [21]:
def extract_cnn_features(images, img_width, img_height):
    # Load VGG16 model + higher level layers
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=(img_width, img_height, 3))
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    model = Model(inputs=base_model.input, outputs=x)

    # Extract features using the CNN
    cnn_features = []
    for img in images:
        img_array = np.expand_dims(img, axis=0)
        features = model.predict(img_array)
        cnn_features.append(features.flatten())
    
    return np.array(cnn_features)

# Extract CNN features
features = extract_cnn_features(images, img_width, img_height)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5


KeyboardInterrupt: 

In [None]:
# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(features, labels, test_size=0.2, random_state=42)


In [None]:
# Define base models
random_forest = RandomForestClassifier(n_estimators=100, random_state=42)
knn = KNeighborsClassifier(n_neighbors=5)
svm = SVC(probability=True, kernel='rbf', random_state=42)

# Define meta-model
meta_model = LogisticRegression()

# Define stacking classifier
stacking_classifier = StackingClassifier(
    estimators=[
        ('svm', svm),
        ('knn', knn),
        ('rf', random_forest)
    ],
    final_estimator=meta_model,
    cv=5
)

# Train the stacking classifier
stacking_classifier.fit(X_train, y_train)
