In [1]:
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np

from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split
import skimage
from skimage.io import imread
from skimage.transform import resize
import os

In [2]:
data_dir = 'D:\Albot\Medical Imaging-Cancer\code\Eyes_dataset\Data'

In [3]:
os.listdir(data_dir)    #Checking categories

['Mild', 'Moderate', 'No_DR', 'Proliferate_DR', 'Severe']

In [4]:
print("No. of images in Mild cases:", len(os.listdir(data_dir+'\\Mild')))
print("No. of images in Moderate cases:", len(os.listdir(data_dir+'\\Moderate')))
print("No. of images in No_DR cases:", len(os.listdir(data_dir+'\\No_DR')))
print("No. of images in Proliferate_DR cases:", len(os.listdir(data_dir+'\\Proliferate_DR')))
print("No. of images in Severe cases:", len(os.listdir(data_dir+'\\Severe')))



No. of images in Mild cases: 799
No. of images in Moderate cases: 789
No. of images in No_DR cases: 856
No. of images in Proliferate_DR cases: 670
No. of images in Severe cases: 814


In [5]:
def load_image_files(container_path, dimension=(64,64)):
    
    image_dir= Path(container_path)
    print("Data folder name:", image_dir)
    
    folder= os.listdir(image_dir)
    print("Each Categories Name:", folder)
    
    folders= [directory for directory in image_dir.iterdir() if directory.is_dir()]
    print("Name of folders:", folders)
    
    images = []
    flat_data= []
    target= []
    
    for i, direc in enumerate(folders):
        #print(direc)
        for file in direc.iterdir():
            img= skimage.io.imread(file)
            #print(img)
            img_resized= resize(img, dimension, mode='reflect')
            flat_data.append(img_resized.flatten())
            images.append(img_resized)
            target.append(i)
            
    flat_data = np.array(flat_data)
    #print("flat_data:", flat_data)
    print("shape of flat_data:", flat_data.shape)

    target= np.array(target)
    print("shape of target:", target.shape)
    print("Target:", target)

    images = np.array(images)
    print("shape of images:", images.shape)
    #print("images:", images)
    
    return Bunch(data= flat_data, Target= target)

In [6]:
image_dataset= load_image_files('Eyes_dataset/Data/')

Data folder name: Eyes_dataset\Data
Each Categories Name: ['Mild', 'Moderate', 'No_DR', 'Proliferate_DR', 'Severe']
Name of folders: [WindowsPath('Eyes_dataset/Data/Mild'), WindowsPath('Eyes_dataset/Data/Moderate'), WindowsPath('Eyes_dataset/Data/No_DR'), WindowsPath('Eyes_dataset/Data/Proliferate_DR'), WindowsPath('Eyes_dataset/Data/Severe')]
shape of flat_data: (3928, 12288)
shape of target: (3928,)
Target: [0 0 0 ... 4 4 4]
shape of images: (3928, 64, 64, 3)


In [7]:
image_dataset.data.shape

(3928, 12288)

In [8]:
image_dataset.Target.shape

(3928,)

In [9]:
df=pd.DataFrame(image_dataset.data)

In [10]:
df['Target']=image_dataset.Target

In [11]:
df.head(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12279,12280,12281,12282,12283,12284,12285,12286,12287,Target
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0


In [12]:
df.tail(5)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12279,12280,12281,12282,12283,12284,12285,12286,12287,Target
3923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3924,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3926,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4
3927,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,4


In [13]:
X=df.iloc[:,:-1] #input data 

In [14]:
X

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,12278,12279,12280,12281,12282,12283,12284,12285,12286,12287
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3923,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.002206,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3924,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3925,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3926,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.000000,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
y=df.iloc[:,-1] #output data

In [16]:
y

0       0
1       0
2       0
3       0
4       0
       ..
3923    4
3924    4
3925    4
3926    4
3927    4
Name: Target, Length: 3928, dtype: int32

In [17]:
X_train, X_test, y_train, y_test= train_test_split(X,y, test_size=0.2, random_state=109)

print('Splitted the data')

Splitted the data


In [18]:
# look at the distrubution of labels in the train set
print("y_train: ", len(y_train), )
print("y_test:", len(y_test))
print("X_train: ", len(X_train))
print("X_test:", len(X_test))

y_train:  3142
y_test: 786
X_train:  3142
X_test: 786


In [None]:
## Training data with parameter optimization

param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [1, 0.1, 0.001, 0.0001], 'kernel': ['rbf']},
    {'C': [1, 10, 100, 1000], 'gamma': [1, 0.1, 0.001, 0.0001], 'kernel': ['poly']},
 ]
svc = svm.SVC()
model = GridSearchCV(svc, param_grid)
model.fit(X_train, y_train)

In [None]:
y_pred = clf.predict(X_test)

print("Prediction: ", y_pred)

In [None]:
# calculate accuracy
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test, y_pred)
print('Model accuracy is: ', accuracy)