In [None]:
import os
import pickle
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from sklearnex import patch_sklearn

patch_sklearn()

from skimage.io import imread
from skimage.transform import resize
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

In [ ]:
training_data_dir = 'C-NMC_Leukemia/training_data/fold_0'
testing_data_dir = 'C-NMC_Leukemia/training_data/fold_1'
validation_data_dir = 'C-NMC_Leukemia/training_data/fold_2'
categories = ['all', 'hem']
data = []
labels = []

In [ ]:
for category_idx, category in enumerate(categories):
    for file in os.listdir(os.path.join(training_data_dir, category)):
        img_path = os.path.join(training_data_dir, category, file)
        img = imread(img_path)
        data.append(img.flatten())
        labels.append(category_idx)

In [ ]:
data = np.asarray(data)
labels = np.asarray(labels)

In [ ]:
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, shuffle=True, stratify=labels)
print("x_train element shape:", x_train[0].shape)
print("x_test element shape:", x_test[0].shape)
print("x_train shape:", x_train.shape)
print("x_test shape:", x_test.shape)

In [ ]:
classifier = SVC()

In [ ]:
parameters = [{'gamma':  [0.1], 'C': [1, 10]}]

grid_search = GridSearchCV(classifier, parameters)

In [ ]:
grid_search.fit(x_train, y_train)

In [ ]:
best_estimator = grid_search.best_estimator_

y_prediction = best_estimator.predict(x_test)

score = accuracy_score(y_prediction, y_test)

print("{}% of images were classified correctly".format(str(score * 100)))