In [36]:
from sklearn.svm import SVC
from tqdm import tqdm
import numpy as np

import medmnist 
from medmnist import BloodMNIST
from medmnist import INFO, Evaluator

from skimage.color import rgb2gray

import os
import cv2

#Modelling

from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV

from scipy.stats import randint
import matplotlib.pyplot as plt

from skimage.feature import hog
import matplotlib.pyplot as plt
from sklearn.model_selection import learning_curve

In [8]:
# For downloading the dataset
train_dataset = BloodMNIST(split="train", download=True)
val_dataset = BloodMNIST(split="val", download=True)
test_dataset = BloodMNIST(split="test", download=True)


Using downloaded and verified file: C:\Users\Catherine\.medmnist\bloodmnist.npz
Using downloaded and verified file: C:\Users\Catherine\.medmnist\bloodmnist.npz
Using downloaded and verified file: C:\Users\Catherine\.medmnist\bloodmnist.npz


In [17]:
# Navigate to the dataset file
npz_file_path = os.path.join(os.path.abspath(os.path.join(os.getcwd(), '..')), 'Datasets', 'bloodmnist.npz')



# Step 4: Load the breastmnist.npz file using numpy
data = np.load(npz_file_path)

# Print the names of arrays stored in the .npz file
print("Stored arrays:", data.files)

for item in data.files:
    print(item)
    print(data[item].shape)

train_dataset = data['train_images']
print('Training dataset', train_dataset.shape)

val_dataset = data['val_images']
print('Validation dataset', val_dataset.shape)

test_dataset = data['test_images']
print('Testing dataset', test_dataset.shape)

y_train_labels = data['train_labels']
print('Training label', y_train_labels.shape)


y_val_labels = data['val_labels']
print('Validation label', y_val_labels.shape)


y_test_labels = data['test_labels']
print('Testing label', y_test_labels.shape)





Stored arrays: ['train_images', 'train_labels', 'val_images', 'val_labels', 'test_images', 'test_labels']
train_images
(11959, 28, 28, 3)
train_labels
(11959, 1)
val_images
(1712, 28, 28, 3)
val_labels
(1712, 1)
test_images
(3421, 28, 28, 3)
test_labels
(3421, 1)
Training dataset (11959, 28, 28, 3)
Validation dataset (1712, 28, 28, 3)
Testing dataset (3421, 28, 28, 3)
Training label (11959, 1)
Validation label (1712, 1)
Testing label (3421, 1)


In [24]:
# Preprocessing test

x_train = np.empty((11959, 784*3))
image = 0


#
print(train_dataset.shape)
#print("Before flattening", train_dataset[0])
print("Before flattening (train)", train_dataset.shape)

for img in train_dataset:
    #print("Image Shape: ", img.shape)  # Ensure each image is 28x28
    x_train[image] = img.flatten()  # Flatten the image and store it
    image += 1  # Move to the next row in x_train
            
#print("After flattening:", x_train[0])
print("After flattening (train):", x_train.shape)

#print("Label before processing (train)",y_train_labels.shape)
y_train_labels=y_train_labels.flatten()
#print("Label after processing (train)",y_train_labels.shape)
print("\n")

#### Preprocessing  validation
x_val = np.empty((11959, 784*3))
image = 0

print("Before flattening (val)", val_dataset.shape)

for img in val_dataset:
    #print("Image Shape: ", img.shape)  # Ensure each image is 28x28
    x_val[image] = img.flatten()  # Flatten the image and store it
    image += 1  # Move to the next row in x_train

print("After flattening (val):", x_val.shape)

#print("Label after processing (val)",y_val_labels.shape)
y_val_labels=y_val_labels.flatten()
#print("Label after processing (val)",y_val_labels.shape)

### Preprocessing test
x_test = np.empty((3421, 784*3))
image = 0

print("Before flattening (test)", test_dataset.shape)

for img in test_dataset:
    #print("Image Shape: ", img.shape)  # Ensure each image is 28x28
    x_test[image] = img.flatten()  # Flatten the image and store it
    image += 1  # Move to the next row in x_train

print("After flattening (test):", x_test.shape)

#print("Label after processing (val)",y_test_labels.shape)
y_test_labels=y_test_labels.flatten()
#print("Label after processing (val)",y_test_labels.shape)

(11959, 28, 28, 3)
Before flattening (train) (11959, 28, 28, 3)
After flattening (train): (11959, 2352)


Before flattening (val) (1712, 28, 28, 3)
After flattening (val): (11959, 2352)
Before flattening (test) (3421, 28, 28, 3)
After flattening (test): (3421, 2352)


In [25]:
svm_ovo = SVC(decision_function_shape='ovo')
svm_ovo.fit(x_train, y_train_labels)




In [28]:
y_pred = svm_ovo.predict(x_test)
print(y_pred.shape)
print(y_test_labels.shape)

accuracy = accuracy_score(y_test_labels, y_pred)
precision = precision_score(y_test_labels, y_pred, average='micro')
recall = recall_score(y_test_labels, y_pred, average='micro')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

con_matrix = confusion_matrix(y_test_labels, y_pred) 
print(con_matrix)

(3421,)
(3421,)
Accuracy: 0.8471207249342297
Precision: 0.8471207249342297
Recall: 0.8471207249342297
[[139   0   0  80  10  15   0   0]
 [  3 581   1  16   3   3  17   0]
 [ 10   0 249  23  11   2  13   3]
 [ 19  18   7 465  10  22  38   0]
 [ 10   0  11  23 193   0   6   0]
 [ 11   1   1  77   1 187   6   0]
 [  0  14  10  23   1   0 618   0]
 [  0   0   4   0   0   0   0 466]]


In [29]:
svm_ovr = SVC(decision_function_shape='ovr')
svm_ovr.fit(x_train, y_train_labels)


In [None]:
y_pred = svm_ovr.predict(x_test)
print(y_pred.shape)
print(y_test_labels.shape)

accuracy = accuracy_score(y_test_labels, y_pred)
precision = precision_score(y_test_labels, y_pred, average='micro')
recall = recall_score(y_test_labels, y_pred, average='micro')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)

con_matrix = confusion_matrix(y_test_labels, y_pred) 
print(con_matrix)

In [47]:
models = (SVC(kernel='linear'),
          SVC(kernel='rbf', gamma=0.7 ),
          SVC(kernel='poly', degree=3))
models = (clf.fit(x_train, y_train_labels) for clf in models)


y_pred = (clf.predict(x_test) for clf in models)



