# Imports

In [2]:
import os
import cv2
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Flatten
import json

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV



# Feature Extractor

In [3]:
class dataGenerator:
    def __init__(self,basePath,subPath) -> None:
        self.basePath = basePath
        self.subPath = subPath
        self.subset = []
        print(self.basePath,self.subPath)

        self.listAppend()

    def listAppend(self):
        classes = os.listdir(self.basePath+self.subPath)
        print(classes)

        classCounter = 0

        for i in classes:
            imagePaths = os.listdir(self.basePath+self.subPath+f'/{i}/fullsized')
            temp = []
            for image in imagePaths:
                temp.append([f"{classCounter}",self.basePath+self.subPath+ f'/{i}'  '/fullsized' +  f'/{image}'])
                # print(image)
            classCounter+=1
            self.subset += temp
    
    def shuffler(self):
        np.random.seed(0)
        npsubSet = np.array(self.subset)
        np.random.shuffle(npsubSet)
        return npsubSet

In [6]:
# os.remove('All_200X/val/.DS_Store')
# os.remove('All_200X/train/.DS_Store')
os.remove('All_200X/test/.DS_Store')

FileNotFoundError: [Errno 2] No such file or directory: 'All_200X/test/.DS_Store'

In [7]:
traindatagen = dataGenerator('All_200X','/train')
trainData = traindatagen.shuffler()

testdatagen = dataGenerator('All_200X','/test')
testData = testdatagen.shuffler()

valdatagen = dataGenerator('All_200X','/val')
valData = valdatagen.shuffler()

All_200X /train
['malignant', 'benign']
All_200X /test
['malignant', 'benign']
All_200X /val
['malignant', 'benign']


In [8]:
class FeatureExtractor:
    def __init__(self,dataset) -> None:
        self.base_model = tf.keras.applications.VGG19(input_shape=(150,150,3),
                                               include_top=False,
                                               weights='imagenet')
        # print(dataset)
        self.dataset = dataset       
        self.extractedFeatures = []    
        self.imagePaths = []
        self.labels = []

        for i in dataset:
            self.labels.append(int(i[0]))
            self.imagePaths.append(i[1])
        
        self.featureCreator()


    def featureCreator(self):
        for i in self.imagePaths:
            
            slicedImages = self.slicer(i)

            currentExtractedFeatures = []

            for image in slicedImages:
                currentExtractedFeatures.append(self.featureExtractor(image).numpy())

            self.extractedFeatures.append(currentExtractedFeatures)



    def slicer(self,imagePath):
        img = plt.imread(imagePath)
        h, w = img.shape[:2]
        ls = []
        for i in range(3):
            roi3 = img[:, int(w*i/3): int(w/3) + int(w*i/3)]
            # plt.imshow(roi)
            for j in range(3):
                roi9 = roi3[int(h*j/3): int(h/3) + int(h*j/3), :]
                ls.append(roi9)
        return ls                    
                                                       
    def preProcessor(self,img):
        img = cv2.resize(img,(150,150))
        imgx = np.expand_dims(img, axis=0)
        return imgx                                        
    
    def featureExtractor(self,image):
        feature = self.base_model(self.preProcessor(image))
        flattened = Flatten()(feature)
        # print(feature.shape,flattened.shape)
        return flattened

In [9]:
valFeatures = FeatureExtractor(valData)

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-02-05 21:43:52.362135: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-02-05 21:43:52.362312: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [10]:
trainFeatures = FeatureExtractor(trainData)

In [11]:
testFeatures = FeatureExtractor(testData)

# SVM Training

In [12]:
def featureScaler(feature):

    maxVal = max(feature)
    minVal = min(feature)

    dezmin = 0
    dezmax = 1

    scaledFeatures =  []
    for i in feature:
        scaled = (    (   (i - minVal)/(maxVal-minVal)  ) /   (dezmax-dezmin)    ) + dezmin
        scaledFeatures.append(scaled)

    return np.array(scaledFeatures)

In [13]:
def SVMDataPreProc(subset):
    subset
    slicedDataset = []
    for j in range(9):
            temp = []
            for i in subset:
                temp.append(featureScaler(i[j][0]))

            slicedDataset.append(temp)
    return slicedDataset

In [14]:
SVMValData = SVMDataPreProc(valFeatures.extractedFeatures)
SVMTrainData = SVMDataPreProc(trainFeatures.extractedFeatures)
SVMTestData = SVMDataPreProc(testFeatures.extractedFeatures)

In [15]:
class multiSVM:
    def __init__(self,Xtrain,yTrain,Xtest,yTest) -> None:
        self.clfs = []
        
        self.Xtrain = Xtrain
        self.yTrain = yTrain
        self.Xtest = Xtest
        self.yTest = yTest


        self.scores = []

    def modelCreator(self):
        for i in range(9):
            self.clfs.append(SVC(kernel='poly', C=6).fit(self.Xtrain[i],self.yTrain))

    def estimator(self):
        for i in range(9):
            y_preds = self.clfs[i].predict(self.Xtest[i])
            self.scores.append(accuracy_score(self.yTest,y_preds))

In [16]:
ensembleSVM = multiSVM(SVMTrainData,trainFeatures.labels,SVMTestData,testFeatures.labels)

In [17]:
ensembleSVM.modelCreator()

In [18]:
ensembleSVM.estimator()
print(ensembleSVM.scores)

[0.8481848184818482, 0.8415841584158416, 0.8382838283828383, 0.834983498349835, 0.8085808580858086, 0.8217821782178217, 0.8415841584158416, 0.834983498349835, 0.8448844884488449]


In [19]:
class MultiSVMPredictor:
    def __init__(self,X,y,classifiers) -> None:
        self.X = X
        self.y = y
        self.classifiers = classifiers
        self.preds = []

        self.prediction()
    
    def prediction(self):
        for i in range(9):
            self.preds.append(self.classifiers[i].predict(self.X[i]))

In [20]:
mpred = MultiSVMPredictor(SVMTestData,testFeatures.labels,ensembleSVM.clfs)

In [36]:
a1 = mpred.preds[0]
a2 = mpred.preds[1]
a3 = mpred.preds[2]
a4 = mpred.preds[3]
a5 = mpred.preds[4]
a6 = mpred.preds[5]
a7 = mpred.preds[6]
a8 = mpred.preds[7]
a9 = mpred.preds[8]


temp = []
for i in range(len(a1)):
    temp.append(np.array([a1[i],a2[i],a3[i],a4[i],a5[i],a6[i],a7[i],a8[i],a9[i]]))

[array([0, 1, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 1, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 1, 0, 1, 0]),
 array([1, 0, 0, 0, 0, 0, 1, 0, 0]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1]),
 array([0, 0, 0, 1, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([1, 1, 1, 1, 1, 1, 1, 1, 1]),
 array([0, 0, 0, 0, 0, 1, 0, 0, 0]),
 array([1, 0, 0, 1, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([1, 0, 0, 0, 0, 0, 1, 0, 1]),
 array([0, 0, 0, 0, 1, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 1, 0, 0, 0, 0]),
 array([1, 1, 1, 1, 0, 1, 1, 1, 1]),
 array([1, 0, 0, 1, 0, 1, 1, 1, 1]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 0, 0, 0, 0, 0, 0, 0, 0]),
 array([0, 1, 1, 0, 0, 1, 1, 1, 0]),
 array([1, 1, 1, 1, 1, 1, 1, 0, 0]),
 

In [41]:
print(len(temp),len(testFeatures.labels))

303 303


In [46]:
def hardVoting(Preds,Actual):
    hvScore = 0
    for i in range(len(Preds)):
        
        classCounter = 0
        for j in Preds[i]:
            if j ==1:
                classCounter+=1
        
        if classCounter>4:
            predicted = 1
        else:
            predicted = 0

        if Actual[i] == predicted:
            hvScore+=1
    return hvScore/len(Preds)
        

In [47]:
acc = hardVoting(temp,testFeatures.labels)

In [48]:
acc

0.8811881188118812