In [12]:
import json
import os
import numpy as np
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV


# Data Generator

In [2]:
class DataLoader:
    def __init__(self,datasetPath,batch,imageSegmentNo) -> None:
        self.datasetPath = datasetPath
        
        self.batchPath = f'{datasetPath}/{batch}'
        self.classes = os.listdir(self.batchPath)


        self.imageSegmentNo = imageSegmentNo

        self.scaledData = []
        

        self.dataExtractor()
        self.shuffler()
        

    def dataExtractor(self):
        for i in range(len(self.classes)):
            if len(self.classes[i].split('.'))<=1:

                for imgdir in os.listdir(f'{self.batchPath}/{self.classes[i]}'):
                    if len(imgdir.split('.'))<=1:
                        jsonMetadata = self.jsonDataLoader(f'{self.batchPath}/{self.classes[i]}/{imgdir}/metadata.json')
                        

                        self.scaledData.append([  self.featureScaler(jsonMetadata['features'][self.imageSegmentNo]), jsonMetadata['classes'][self.imageSegmentNo]])
                        # self.y.append()


                        


    def jsonDataLoader(self,jsonPath):
        with open(jsonPath) as jsonfile:
            currentData = json.load(jsonfile)
            return currentData

    def featureScaler(self,feature):

        maxVal = max(feature[0])
        minVal = min(feature[0])

        dezmin = 0
        dezmax = 1

        scaledFeatures =  []
        for i in feature[0]:
            scaled = (    (   (i - minVal)/(maxVal-minVal)  ) /   (dezmax-dezmin)    ) + dezmin
            scaledFeatures.append(scaled)

        return np.array(scaledFeatures)


    def shuffler(self):
        valData=  np.array(self.scaledData)
        np.random.shuffle(valData)
        
        ListX = []
        Listy = []


        for i in valData:
            ListX.append(i[0])
            Listy.append(i[1])

        self.X = np.array(ListX)
        self.y = np.array(Listy)
        


In [3]:
valDataComplete = []
for i in range(9):
    valDataComplete.append(DataLoader('All_200X','val',i))


  valData=  np.array(self.scaledData)


In [4]:
trainDataComplete = []
for i in range(9):
    trainDataComplete.append(DataLoader('All_200X','train',i))

  valData=  np.array(self.scaledData)


In [5]:
testDataComplete = []
for i in range(9):
    testDataComplete.append(DataLoader('All_200X','test',i))

  valData=  np.array(self.scaledData)


# Ensemble SVM Training

## SVM Creator + training

In [21]:
class multiSVM:
    def __init__(self,traindata,testdata) -> None:
        self.clfs = []
        self.traindata = traindata
        self.testdata = testdata
        self.scores = []

        self.param_grid = {'C': [0.1, 1, 10, 100, 1000],
                        'gamma': [1, 0.1, 0.01, 0.001, 0.0001],
                        'kernel': ['rbf']}
        


    def modelCreator(self):
        # for i in range(9):
            # self.clfs.append(SVC(kernel='poly', C=1,degree = 3).fit(self.traindata[i].X,self.traindata[i].y))
        i = 0
        grid = GridSearchCV(SVC(), self.param_grid, refit = True, verbose = 1)
        grid.fit(self.traindata[i].X,self.traindata[i].y)
        print("SVM {i} Params and Accuracy:")
        print(grid.best_params_)
        print(grid.best_estimator_)
        print("\n")


    
    def estimator(self):
        pass
        # for i in range(9):
        #     y_preds = self.clfs[i].predict(self.testdata[i].X)

        #     self.scores.append(accuracy_score(self.testdata[i].y,y_preds))

In [22]:
ensembleSVMs = multiSVM(trainDataComplete,testDataComplete)
ensembleSVMs.modelCreator()
ensembleSVMs.estimator()
print(ensembleSVMs.scores)

Fitting 5 folds for each of 25 candidates, totalling 125 fits


## SVM Predictor

In [8]:
class multiSVMPredictor:
    def __init__(self,datasetPath,batch,classifiers) -> None:
        self.datasetPath = datasetPath
        
        self.batchPath = f'{datasetPath}/{batch}'
        self.classes = os.listdir(self.batchPath)
        self.scaledData = []
        self.classifiers = classifiers
        

        self.dataExtractor()
    


    def dataExtractor(self):
        for i in range(len(self.classes)):
            if len(self.classes[i].split('.'))<=1:

                for imgdir in os.listdir(f'{self.batchPath}/{self.classes[i]}'):
                    if len(imgdir.split('.'))<=1:
                        jsonMetadata = self.jsonDataLoader(f'{self.batchPath}/{self.classes[i]}/{imgdir}/metadata.json')
                        
                        
                        preds = []
                        actualLabel = jsonMetadata['classes'][0]
                        for j in range(9):
                            scaled = self.featureScaler(jsonMetadata['features'][j])
                            preds.append(self.classifiers[j].predict([scaled]).tolist())
                            
                        

                        outdict = {
                            'preds': preds,
                            'actualLabel': actualLabel
                        }


                        print(preds)
                        print(actualLabel)
                        print(f'{self.batchPath}/{self.classes[i]}/{imgdir}/SVMPreds.json')
                        
                        json_object = json.dumps(outdict)
                        
                        with open(f'{self.batchPath}/{self.classes[i]}/{imgdir}/SVMPreds.json', "w") as outfile:
                            outfile.write(json_object)
                      


    def jsonDataLoader(self,jsonPath):
        with open(jsonPath) as jsonfile:
            currentData = json.load(jsonfile)
            return currentData

    def featureScaler(self,feature):

        maxVal = max(feature[0])
        minVal = min(feature[0])

        dezmin = 0
        dezmax = 1

        scaledFeatures =  []
        for i in feature[0]:
            scaled = (    (   (i - minVal)/(maxVal-minVal)  ) /   (dezmax-dezmin)    ) + dezmin
            scaledFeatures.append(scaled)

        return np.array(scaledFeatures)
    

In [9]:
svmesti = multiSVMPredictor('All_200X','test',ensembleSVMs.clfs)

[[0], [0], [0], [0], [0], [0], [0], [0], [0]]
0
All_200X/test/malignant/SOB_M_MC-14-16456-200-008/SVMPreds.json
[[0], [0], [0], [0], [0], [0], [0], [0], [0]]
0
All_200X/test/malignant/SOB_M_DC-14-9461-200-070/SVMPreds.json
[[0], [0], [0], [0], [0], [0], [0], [0], [0]]
0
All_200X/test/malignant/SOB_M_PC-14-19440-200-004/SVMPreds.json
[[0], [0], [0], [0], [0], [0], [0], [0], [0]]
0
All_200X/test/malignant/SOB_M_MC-14-16456-200-001/SVMPreds.json
[[0], [0], [0], [0], [0], [0], [0], [0], [0]]
0
All_200X/test/malignant/SOB_M_LC-14-13412-200-006/SVMPreds.json
[[0], [0], [0], [0], [0], [0], [0], [0], [0]]
0
All_200X/test/malignant/SOB_M_DC-14-17901-200-001/SVMPreds.json
[[0], [0], [0], [0], [0], [0], [0], [0], [0]]
0
All_200X/test/malignant/SOB_M_DC-14-5694-200-015/SVMPreds.json
[[0], [0], [0], [0], [0], [0], [0], [0], [0]]
0
All_200X/test/malignant/SOB_M_DC-14-5694-200-012/SVMPreds.json
[[0], [0], [0], [0], [0], [0], [0], [0], [0]]
0
All_200X/test/malignant/SOB_M_LC-14-13412-200-001/SVMPreds.

KeyboardInterrupt: 

# Hard Voting Predictor

In [None]:
def jsonDataLoader(jsonPath):
        with open(jsonPath) as jsonfile:
            currentData = json.load(jsonfile)
            return currentData

In [None]:

base = 'All_200X'
subset = 'val'
classes = ['benign','malignant']
def hardVotingPredictor(base,subset,classes):
    score = 0

    for i in classes:
        imagedirs = os.listdir(f'{base}/{subset}/{i}')
        
        for imagedir in imagedirs:
            if imagedir != 'fullsized' and imagedir != '.DS_Store':
                
                jsondata = jsonDataLoader(f'{base}/{subset}/{i}/{imagedir}/SVMPreds.json')
                
                predlabes = []
                for j in jsondata['preds']:
                    predlabes.append(j[0])
                # print(predlabes)
                predicted = max(set(predlabes), key=predlabes.count)
                # print(predicted,jsondata['actualLabel'])
                if predicted == jsondata['actualLabel']:
                    score+=1
    return score
    

In [None]:
total = len(os.listdir('All_200X/val/benign'))+ len(os.listdir('All_200X/val/malignant'))

In [None]:
print(total)

306


In [None]:
score = hardVotingPredictor(base,subset,classes)

In [None]:
score/total

0.8594771241830066

# SVM Decision Model

In [None]:
class OutputSVMDataGen:
    def __init__(self,datasetPath,batch) -> None:
        self.datasetPath = datasetPath
        
        self.batchPath = f'{datasetPath}/{batch}'
        self.classes = os.listdir(self.batchPath)


        # self.imageSegmentNo = imageSegmentNo

        self.finalData = []
        

        self.dataExtractor()
        self.shuffler()
        

    def dataExtractor(self):
        for i in range(len(self.classes)):
            if len(self.classes[i].split('.'))<=1:

                for imgdir in os.listdir(f'{self.batchPath}/{self.classes[i]}'):
                    if len(imgdir.split('.'))<=1:
                        jsonMetadata = self.jsonDataLoader(f'{self.batchPath}/{self.classes[i]}/{imgdir}/SVMPreds.json')
                        

                        preds = []
                        for j in jsonMetadata['preds']:
                            preds.append(j[0])

                        # print([preds,jsonMetadata['actualLabel']])

                        self.finalData.append([preds,jsonMetadata['actualLabel']])


    def jsonDataLoader(self,jsonPath):
        with open(jsonPath) as jsonfile:
            currentData = json.load(jsonfile)
            return currentData


    def shuffler(self):
        # print(self.finalData)
        valData=  np.array(self.finalData)
        np.random.shuffle(valData)
        
        # print(valData)

        ListX = []
        Listy = []


        for i in valData:
            ListX.append(i[0])
            Listy.append(i[1])

        self.X = np.array(ListX)
        self.y = np.array(Listy)
        


In [None]:
valSVMData = OutputSVMDataGen('All_200X','val')
trainSVMData = OutputSVMDataGen('All_200X','train')
testvalSVMData = OutputSVMDataGen('All_200X','test')

  valData=  np.array(self.finalData)


In [None]:
print(len(valSVMData.X))
print(len(trainSVMData.X))
print(len(testvalSVMData.X))


304
1410
305


In [None]:
deciderSVM = SVC(kernel='poly', C=1,degree = 6).fit(trainSVMData.X,trainSVMData.y)

y_preds = deciderSVM.predict(valSVMData.X)
print(accuracy_score(valSVMData.y,y_preds))

0.8717105263157895
