# imports

In [1]:
import numpy as np
import tensorflow as tf
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import cv2
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers
from tensorflow.keras.applications import EfficientNetB0

# data pre-processing

In [2]:
dataset_path = os.listdir('../../datasets/data21k')

print(f'disease classes: {dataset_path}')
print(f'types of diseases: {len(dataset_path)}')

disease classes: ['CBB', 'CBSD', 'CGM', 'CH', 'CMD']
types of diseases: 5


In [3]:
disease_labels = []

for disease in dataset_path:
   all_classes = os.listdir(f'../../datasets/data21k/{disease}')

   for image in all_classes:
      disease_labels.append((disease, str(disease) + '/' + image))
      
print(f'Number of images: {len(disease_labels)}')
print(f'Example of a class label: {disease_labels[0]}')
print(f'Example of a class label: {disease_labels[-1]}')

Number of images: 21397
Example of a class label: ('CBB', 'CBB/1000015157.jpg')
Example of a class label: ('CMD', 'CMD/999329392.jpg')


In [4]:
df = pd.DataFrame(data=disease_labels, columns=['Disease', 'Image'])

In [5]:
df.head()

Unnamed: 0,Disease,Image
0,CBB,CBB/1000015157.jpg
1,CBB,CBB/1001320321.jpg
2,CBB,CBB/1003888281.jpg
3,CBB,CBB/1012426959.jpg
4,CBB,CBB/1018973237.jpg


In [6]:
df.tail()

Unnamed: 0,Disease,Image
21392,CMD,CMD/997485103.jpg
21393,CMD,CMD/997651546.jpg
21394,CMD,CMD/997857988.jpg
21395,CMD,CMD/999068805.jpg
21396,CMD,CMD/999329392.jpg


In [7]:
label_count = df['Disease'].value_counts()
print(label_count)
print('total images:', len(df))

Disease
CMD     13158
CH       2577
CGM      2386
CBSD     2189
CBB      1087
Name: count, dtype: int64
total images: 21397


In [8]:
path = '../../datasets/data21k/'
dataset_path = os.listdir(path)

image_size = 300

images = []
diseases = []

for i in dataset_path:
   data_path = path + str(i)
   filenames = [i for i in os.listdir(data_path)]

   for f in filenames:
      image = cv2.imread(data_path + '/' + f)
      image = cv2.resize(image, (image_size, image_size))
      images.append(image)
      diseases.append(i)

KeyboardInterrupt: 

In [None]:
images = np.array(images)
images = images.astype('float32') / 255.0
images.shape

In [None]:
y = df['Disease'].values
print(y)

y_labelencoder = LabelEncoder()
y = y_labelencoder.fit_transform(y)
print(y)

In [None]:
y = y.reshape(-1, 1)

ct = ColumnTransformer([('encoder', OneHotEncoder(), [0])], remainder='passthrough')
Y = ct.fit_transform(y)

print(Y[0])
print(Y[-1])

In [None]:
images, Y = shuffle(images, Y, random_state=1)

train_x, test_x, train_y, test_y = train_test_split(images, Y, test_size=0.2, random_state=415)

print(train_x.shape)
print(train_y.shape)
print(test_x.shape)
print(test_y.shape)

In [None]:
num_classes = 5
image_size = 300
size = (image_size, image_size)

inputs = layers.Input(shape=(image_size, image_size, 3))

outputs = EfficientNetB3(include_top=True, weights=None, classes=num_classes)(inputs)