# Imports

In [2]:
import os
import cv2
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from keras.layers import Flatten
import json

from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV



# Feature Extractor

In [3]:
class dataGenerator:
    def __init__(self,basePath,subPath) -> None:
        self.basePath = basePath
        self.subPath = subPath
        self.subset = []
        print(self.basePath,self.subPath)

        self.listAppend()

    def listAppend(self):
        classes = os.listdir(self.basePath+self.subPath)
        print(classes)

        classCounter = 0

        for i in classes:
            imagePaths = os.listdir(self.basePath+self.subPath+f'/{i}/fullsized')
            temp = []
            for image in imagePaths:
                temp.append([f"{classCounter}",self.basePath+self.subPath+ f'/{i}'  '/fullsized' +  f'/{image}'])
                # print(image)
            classCounter+=1
            self.subset += temp
    
    def shuffler(self):
        np.random.seed(0)
        npsubSet = np.array(self.subset)
        np.random.shuffle(npsubSet)
        return npsubSet

In [6]:
# os.remove('All_200X/val/.DS_Store')
# os.remove('All_200X/train/.DS_Store')
os.remove('All_200X/test/.DS_Store')

FileNotFoundError: [Errno 2] No such file or directory: 'All_200X/test/.DS_Store'

In [7]:
traindatagen = dataGenerator('All_200X','/train')
trainData = traindatagen.shuffler()

testdatagen = dataGenerator('All_200X','/test')
testData = testdatagen.shuffler()

valdatagen = dataGenerator('All_200X','/val')
valData = valdatagen.shuffler()

All_200X /train
['malignant', 'benign']
All_200X /test
['malignant', 'benign']
All_200X /val
['malignant', 'benign']


In [8]:
class FeatureExtractor:
    def __init__(self,dataset) -> None:
        self.base_model = tf.keras.applications.VGG19(input_shape=(150,150,3),
                                               include_top=False,
                                               weights='imagenet')
        # print(dataset)
        self.dataset = dataset       
        self.extractedFeatures = []    
        self.imagePaths = []
        self.labels = []

        for i in dataset:
            self.labels.append(int(i[0]))
            self.imagePaths.append(i[1])
        
        self.featureCreator()


    def featureCreator(self):
        for i in self.imagePaths:
            
            slicedImages = self.slicer(i)

            currentExtractedFeatures = []

            for image in slicedImages:
                currentExtractedFeatures.append(self.featureExtractor(image).numpy())

            self.extractedFeatures.append(currentExtractedFeatures)



    def slicer(self,imagePath):
        img = plt.imread(imagePath)
        h, w = img.shape[:2]
        ls = []
        for i in range(3):
            roi3 = img[:, int(w*i/3): int(w/3) + int(w*i/3)]
            # plt.imshow(roi)
            for j in range(3):
                roi9 = roi3[int(h*j/3): int(h/3) + int(h*j/3), :]
                ls.append(roi9)
        return ls                    
                                                       
    def preProcessor(self,img):
        img = cv2.resize(img,(150,150))
        imgx = np.expand_dims(img, axis=0)
        return imgx                                        
    
    def featureExtractor(self,image):
        feature = self.base_model(self.preProcessor(image))
        flattened = Flatten()(feature)
        # print(feature.shape,flattened.shape)
        return flattened

In [9]:
valFeatures = FeatureExtractor(valData)

Metal device set to: Apple M1

systemMemory: 16.00 GB
maxCacheSize: 5.33 GB



2022-02-05 21:43:52.362135: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:305] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2022-02-05 21:43:52.362312: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:271] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [10]:
trainFeatures = FeatureExtractor(trainData)

In [11]:
testFeatures = FeatureExtractor(testData)

# SVM Training

In [12]:
def featureScaler(feature):

    maxVal = max(feature)
    minVal = min(feature)

    dezmin = 0
    dezmax = 1

    scaledFeatures =  []
    for i in feature:
        scaled = (    (   (i - minVal)/(maxVal-minVal)  ) /   (dezmax-dezmin)    ) + dezmin
        scaledFeatures.append(scaled)

    return np.array(scaledFeatures)

In [13]:
def SVMDataPreProc(subset):
    subset
    slicedDataset = []
    for j in range(9):
            temp = []
            for i in subset:
                temp.append(featureScaler(i[j][0]))

            slicedDataset.append(temp)
    return slicedDataset

In [14]:
SVMValData = SVMDataPreProc(valFeatures.extractedFeatures)
SVMTrainData = SVMDataPreProc(trainFeatures.extractedFeatures)
SVMTestData = SVMDataPreProc(testFeatures.extractedFeatures)

In [15]:
class multiSVM:
    def __init__(self,Xtrain,yTrain,Xtest,yTest) -> None:
        self.clfs = []
        
        self.Xtrain = Xtrain
        self.yTrain = yTrain
        self.Xtest = Xtest
        self.yTest = yTest


        self.scores = []

    def modelCreator(self):
        for i in range(9):
            self.clfs.append(SVC(kernel='poly', C=6).fit(self.Xtrain[i],self.yTrain))

    def estimator(self):
        for i in range(9):
            y_preds = self.clfs[i].predict(self.Xtest[i])
            self.scores.append(accuracy_score(self.yTest,y_preds))

In [16]:
ensembleSVM = multiSVM(SVMTrainData,trainFeatures.labels,SVMTestData,testFeatures.labels)

In [17]:
ensembleSVM.modelCreator()

In [18]:
ensembleSVM.estimator()
print(ensembleSVM.scores)

[0.8481848184818482, 0.8415841584158416, 0.8382838283828383, 0.834983498349835, 0.8085808580858086, 0.8217821782178217, 0.8415841584158416, 0.834983498349835, 0.8448844884488449]


In [19]:
class MultiSVMPredictor:
    def __init__(self,X,y,classifiers) -> None:
        self.X = X
        self.y = y
        self.classifiers = classifiers
        self.preds = []

        self.prediction()
    
    def prediction(self):
        for i in range(9):
            self.preds.append(self.classifiers[i].predict(self.X[i]))

In [59]:
trainEnsemblePredsInitial = MultiSVMPredictor(SVMTrainData,trainFeatures.labels,ensembleSVM.clfs)
atrain1 = trainEnsemblePredsInitial.preds[0]
atrain2 = trainEnsemblePredsInitial.preds[1]
atrain3 = trainEnsemblePredsInitial.preds[2]
atrain4 = trainEnsemblePredsInitial.preds[3]
atrain5 = trainEnsemblePredsInitial.preds[4]
atrain6 = trainEnsemblePredsInitial.preds[5]
atrain7 = trainEnsemblePredsInitial.preds[6]
atrain8 = trainEnsemblePredsInitial.preds[7]
atrain9 = trainEnsemblePredsInitial.preds[8]


trainEnsemblePreds = []
for i in range(len(atrain1)):
    trainEnsemblePreds.append(np.array([atrain1[i],atrain2[i],atrain3[i],atrain4[i],atrain5[i],atrain6[i],atrain7[i],atrain8[i],atrain9[i]]))

In [54]:
testEnsemblePredsInitial = MultiSVMPredictor(SVMTestData,testFeatures.labels,ensembleSVM.clfs)
atest1 = testEnsemblePredsInitial.preds[0]
atest2 = testEnsemblePredsInitial.preds[1]
atest3 = testEnsemblePredsInitial.preds[2]
atest4 = testEnsemblePredsInitial.preds[3]
atest5 = testEnsemblePredsInitial.preds[4]
atest6 = testEnsemblePredsInitial.preds[5]
atest7 = testEnsemblePredsInitial.preds[6]
atest8 = testEnsemblePredsInitial.preds[7]
atest9 = testEnsemblePredsInitial.preds[8]


testEnsemblePreds = []
for i in range(len(atest1)):
    testEnsemblePreds.append(np.array([atest1[i],atest2[i],atest3[i],atest4[i],atest5[i],atest6[i],atest7[i],atest8[i],atest9[i]]))

In [77]:
valEnsemblePredsInitial = MultiSVMPredictor(SVMValData,valFeatures.labels,ensembleSVM.clfs)
aval1 = valEnsemblePredsInitial.preds[0]
aval2 = valEnsemblePredsInitial.preds[1]
aval3 = valEnsemblePredsInitial.preds[2]
aval4 = valEnsemblePredsInitial.preds[3]
aval5 = valEnsemblePredsInitial.preds[4]
aval6 = valEnsemblePredsInitial.preds[5]
aval7 = valEnsemblePredsInitial.preds[6]
aval8 = valEnsemblePredsInitial.preds[7]
aval9 = valEnsemblePredsInitial.preds[8]


valEnsemblePreds = []
for i in range(len(aval1)):
    valEnsemblePreds.append(np.array([aval1[i],aval2[i],aval3[i],aval4[i],aval5[i],aval6[i],aval7[i],aval8[i],aval9[i]]))

In [78]:
print(len(valEnsemblePreds),len(valFeatures.labels))

302 302


In [55]:
print(len(testEnsemblePreds),len(testFeatures.labels))

303 303


In [60]:
print(len(trainEnsemblePreds),len(trainFeatures.labels))

1408 1408


# SVM Predictor

In [71]:
from sklearn.utils.fixes import loguniform

param_grid = [
#   {'C': [1,10,100,1000], 'kernel': ['linear']},
#   {'C': [1,10,100,1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
  {'C': [1,10,100,1000], 'degree': [1,2,3,4,5,6,7,8,9,10], 'kernel': ['poly']},
 ]

grid = GridSearchCV(SVC(), param_grid, refit = True, verbose = 3)
grid.fit(trainEnsemblePreds,trainFeatures.labels)
print("SVM {i} Params and Accuracy:")
print(grid.best_params_)
print(grid.best_estimator_)
print("\n")

Fitting 5 folds for each of 40 candidates, totalling 200 fits
[CV 1/5] END ........C=1, degree=1, kernel=poly;, score=0.982 total time=   0.0s
[CV 2/5] END ........C=1, degree=1, kernel=poly;, score=0.979 total time=   0.0s
[CV 3/5] END ........C=1, degree=1, kernel=poly;, score=0.972 total time=   0.0s
[CV 4/5] END ........C=1, degree=1, kernel=poly;, score=0.975 total time=   0.0s
[CV 5/5] END ........C=1, degree=1, kernel=poly;, score=0.975 total time=   0.0s
[CV 1/5] END ........C=1, degree=2, kernel=poly;, score=0.979 total time=   0.0s
[CV 2/5] END ........C=1, degree=2, kernel=poly;, score=0.968 total time=   0.0s
[CV 3/5] END ........C=1, degree=2, kernel=poly;, score=0.965 total time=   0.0s
[CV 4/5] END ........C=1, degree=2, kernel=poly;, score=0.975 total time=   0.0s
[CV 5/5] END ........C=1, degree=2, kernel=poly;, score=0.964 total time=   0.0s
[CV 1/5] END ........C=1, degree=3, kernel=poly;, score=0.975 total time=   0.0s
[CV 2/5] END ........C=1, degree=3, kernel=poly

In [79]:
deciderSVM = SVC(C=1, degree=1, kernel='poly').fit(trainEnsemblePreds,trainFeatures.labels)



In [81]:
y_preds = deciderSVM.predict(testEnsemblePreds)
print(f'Accuracy is: {accuracy_score(testFeatures.labels,y_preds)*100} %')

Accuracy is: 89.10891089108911 %


# Hardvoting

In [56]:
def hardVoting(Preds,Actual):
    hvScore = 0
    for i in range(len(Preds)):
        
        classCounter = 0
        for j in Preds[i]:
            if j ==1:
                classCounter+=1
        
        if classCounter>4:
            predicted = 1
        else:
            predicted = 0

        if Actual[i] == predicted:
            hvScore+=1
    return hvScore/len(Preds)
        

In [86]:
acc = hardVoting(testEnsemblePreds,testFeatures.labels)

In [87]:
acc

0.8811881188118812