In [11]:
import os
import cv2
import xml.etree.ElementTree as ET
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score

# Read all XML files
path = '/content/drive/MyDrive/BCCD_Dataset'
xml_files = [(os.path.join(root, name))
    for root, dirs, files in os.walk(path)
    for name in files if name.endswith(".xml")]

# HOG parametrization
winSize = (64,64)
blockSize = (16,16)
blockStride = (4,4)
cellSize = (8,8)
nbins = 9
derivAperture = 1
winSigma = -1.
histogramNormType = 0
L2HysThreshold = 0.2
gammaCorrection = 1
nlevels = 64
useSignedGradients = True

hog = cv2.HOGDescriptor(winSize,blockSize,blockStride,cellSize,nbins,derivAperture,winSigma,histogramNormType,L2HysThreshold,gammaCorrection,nlevels, useSignedGradients)

# Initialize lists to store features and labels
features = []
labels = []

# Retrieve image patches from XML info and images
for t in xml_files:
    root = ET.parse(t).getroot()
    img_name = root.find('filename').text
    img = cv2.imread(os.path.join(path, img_name))

    for elem in root.findall('object'):
        classes = elem.find('name').text
        x1 = int(elem.find('bndbox/xmin').text)
        y1 = int(elem.find('bndbox/ymin').text)
        x2 = int(elem.find('bndbox/xmax').text)
        y2 = int(elem.find('bndbox/ymax').text)

        if x2 > x1 and y2 > y1:
            if classes == 'RBC':
                label = 0
            elif classes == 'WBC':
                label = 1
            elif classes == 'Platelets':
                label = 2
            else:
                continue

            cropped_img = img[y1:y2, x1:x2]
            resized_img = cv2.resize(cropped_img, winSize)
            descriptor = hog.compute(resized_img).flatten()

            features.append(descriptor)
            labels.append(label)

# Convert lists to numpy arrays
features = np.array(features, dtype=np.float32)
labels = np.array(labels, dtype=np.int64)

# Split data for training and testing
X_train, X_test, y_train, y_test = train_test_split(features, labels,test_size=0.2, random_state=42)

print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)

# Train SVM classifier
clf = svm.SVC()
clf.fit(X_train, y_train)

# Predict and calculate accuracy
y_pred = clf.predict(X_test)
print('Accuracy:', accuracy_score(y_test, y_pred))


(3908, 6084) (3908,)
(978, 6084) (978,)
Accuracy: 0.9652351738241309
