In [1]:
import tensorflow as tf
import keras
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Conv2D, MaxPooling2D, Flatten

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

from PIL import Image

import glob
import os
import pandas as pd

import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import numpy as np

In [2]:
def annotation(file, annotateFile):
    assert os.path.exists(file) and os.path.exists(annotateFile)
    
    classes = dict()
    for im_num, file in enumerate(glob.glob(file + "/*")):
        for annotate in glob.glob(annotateFile + "/*"):
            if str(im_num+1) + "_" in annotate:
                classes[file] = annotate
                break
                
    return classes

parent = "Blood-Cancer_Data"
ALL_IDB1 = parent + "/All_IDB1/im"
annotate1 = parent + "/ALL_IDB1/xyc"

data = annotation(ALL_IDB1, annotate1)
image_files, centroid_files = data.keys(), data.values()

In [3]:
df = pd.DataFrame(data={"image_files":image_files,
                        "annotations:":centroid_files})

# Must be type pandas.DataFrame to work
train, validate, test = np.split(df.sample(frac=1), [int(.6*len(df)), int(.8*len(df))])

In [4]:
train.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 64 entries, 98 to 82
Data columns (total 2 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   image_files   64 non-null     object
 1   annotations:  64 non-null     object
dtypes: object(2)
memory usage: 1.5+ KB


In [5]:
# train["image_pixels"] = train["image_files"].apply(lambda img: (img)Image.open.resize((1368, 1712)))

In [6]:
train["image_pixels"] = train["image_files"].apply(lambda img: mpimg.imread(img))

In [6]:
train.head()

Unnamed: 0,image_files,annotations:,image_pixels
14,Blood-Cancer_Data/All_IDB1/im\Im015_1.jpg,Blood-Cancer_Data/ALL_IDB1/xyc\Im015_1.xyc,"[[[161, 163, 158], [161, 163, 158], [163, 164,..."
57,Blood-Cancer_Data/All_IDB1/im\Im058_1.jpg,Blood-Cancer_Data/ALL_IDB1/xyc\Im058_1.xyc,"[[[149, 138, 118], [151, 140, 120], [149, 141,..."
95,Blood-Cancer_Data/All_IDB1/im\Im096_0.jpg,Blood-Cancer_Data/ALL_IDB1/xyc\Im096_0.xyc,"[[[109, 101, 90], [103, 95, 84], [107, 97, 87]..."
64,Blood-Cancer_Data/All_IDB1/im\Im065_0.jpg,Blood-Cancer_Data/ALL_IDB1/xyc\Im065_0.xyc,"[[[118, 119, 103], [118, 119, 103], [117, 118,..."
89,Blood-Cancer_Data/All_IDB1/im\Im090_0.jpg,Blood-Cancer_Data/ALL_IDB1/xyc\Im090_0.xyc,"[[[139, 144, 121], [140, 145, 122], [139, 144,..."


#### The readme on ALL_IDB1 states that if y=1 on each image name, blast cells exist from ALL patients, where a sample image is notated as xxx_y.jpg. 

In [7]:
train["diagnosis"] = train["image_files"].apply(lambda x: "ALL" if "_1" in x else "Healthy")

In [8]:
train["diagnosis"]

98     Healthy
89     Healthy
12         ALL
103    Healthy
59         ALL
        ...   
23         ALL
79     Healthy
5          ALL
87     Healthy
82     Healthy
Name: diagnosis, Length: 64, dtype: object

In [9]:
def build_model(shape, n_classes):
    model = Sequential()
    model.add(Dense(units=256, input_shape=shape, activation="relu"))
    model.add(Dense(units=128, activation="relu"))
    model.add(Conv2D(filters=32, kernel_size=(3, 3), activation="relu"))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dense(units=16, activation="relu"))
    model.add(Conv2D(filters=8, kernel_size=(3, 3), activation="relu"))
    model.add(Flatten())
    model.add(Dense(units=n_classes, activation="sigmoid"))
    return model

#### For a keras cnn, input image data must be of shape (n_samples, img_rows, img_cols, rgb)

In [10]:
custom_cnn = build_model((1368, 1712, 3), 2)

In [11]:
custom_cnn.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 1368, 1712, 256)   1024      
_________________________________________________________________
dense_1 (Dense)              (None, 1368, 1712, 128)   32896     
_________________________________________________________________
conv2d (Conv2D)              (None, 1366, 1710, 32)    36896     
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 683, 855, 32)      0         
_________________________________________________________________
dense_2 (Dense)              (None, 683, 855, 16)      528       
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 681, 853, 8)       1160      
_________________________________________________________________
flatten (Flatten)            (None, 4647144)           0

In [12]:
custom_cnn.compile(optimizer="adam", loss="mean_squared_error")

In [13]:
# X_train shape should be (64, 1368, 1712, 3)
img_pixels = train["image_pixels"]
labels = train["diagnosis"]

In [40]:
y_classes = LabelEncoder().fit_transform(labels)

In [18]:
##### Reshaping functionality to accomodate for keras 4d arr required
def images_array(data, target_shape, out_ndim=4):
    final_arr = np.array([[[[]]]])
    n_samples = len(data)
    rows, cols, rgb = target_shape
    for d in data:
        if d.shape != target_shape:
            n_samples -= 1
            continue
        final_arr = np.append(final_arr, d)
    
    return final_arr.reshape(n_samples, rows, cols, rgb)

In [19]:
X_train = images_array(img_pixels, (1368, 1712, 3))

In [20]:
X_train.shape

(20, 1368, 1712, 3)

In [21]:
working_idcs = train.loc[train["image_pixels"].apply(lambda img: img.shape) == (1368, 1712, 3)]
working_idcs = working_idcs.index

In [101]:
train.head()

Unnamed: 0,image_files,annotations:,image_pixels,diagnosis
95,Blood-Cancer_Data/All_IDB1/im\Im096_0.jpg,Blood-Cancer_Data/ALL_IDB1/xyc\Im096_0.xyc,"[[[109, 101, 90], [103, 95, 84], [107, 97, 87]...",Healthy
102,Blood-Cancer_Data/All_IDB1/im\Im103_0.jpg,Blood-Cancer_Data/ALL_IDB1/xyc\Im103_0.xyc,"[[[103, 111, 88], [102, 110, 87], [105, 113, 9...",Healthy
51,Blood-Cancer_Data/All_IDB1/im\Im052_1.jpg,Blood-Cancer_Data/ALL_IDB1/xyc\Im052_1.xyc,"[[[142, 129, 112], [142, 129, 112], [140, 124,...",ALL
7,Blood-Cancer_Data/All_IDB1/im\Im008_1.jpg,Blood-Cancer_Data/ALL_IDB1/xyc\Im008_1.xyc,"[[[140, 123, 103], [141, 124, 108], [142, 124,...",ALL
25,Blood-Cancer_Data/All_IDB1/im\Im026_1.jpg,Blood-Cancer_Data/ALL_IDB1/xyc\Im026_1.xyc,"[[[150, 157, 150], [151, 158, 151], [153, 158,...",ALL


In [22]:
working_idcs

Int64Index([12, 32, 28, 11, 9, 2, 13, 6, 26, 21, 31, 22, 1, 7, 16, 19, 3, 25,
            23, 5],
           dtype='int64')

In [41]:
new_y_classes = []
for idx, cls in enumerate(y_classes):
    if idx in list(working_idcs):
        new_y_classes.append(cls)

In [42]:
new_y_classes = np.array(new_y_classes)

In [43]:
new_y_classes

array([1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0])

In [44]:
from keras import utils

y_classes = utils.to_categorical(new_y_classes, 2)

In [45]:
y_classes.shape

(20, 2)

In [30]:
X_train.shape

(20, 1368, 1712, 3)

In [46]:
datagen = ImageDataGenerator(
    horizontal_flip=True
)

datagen.fit(X_train)
# fits the model on batches with real-time data augmentation:
model.fit(datagen.flow(x_train, y_train, batch_size=32), epochs=1)

NameError: name 'ImageDataGenerator' is not defined

In [None]:
import keras

from tensorflow.keras.preprocessing.image import ImageDataGenerator

datagen = ImageDataGenerator(
    horizontal_flip=True
)

datagen.fit(X_train)
# fits the model on batches with real-time data augmentation:
custom_cnn.fit(datagen.flow(X_train, y_classes, batch_size=32))

In [153]:
custom_cnn.fit(datagen)

ValueError: Failed to find data adapter that can handle input: <class 'tensorflow.python.keras.preprocessing.image.ImageDataGenerator'>, <class 'NoneType'>

In [114]:
datagen = ImageDataGenerator(
    horizontal_flip=True
)

datagen.flow([[np.array([1]), np.array([2])]], [np.array([1]), np.array([2])])

IndexError: list index out of range

In [96]:
datagen = ImageDataGenerator(
    horizontal_flip=True
)

datagen.flow(np.array([[[[1, 1]]]]), np.array([[1, 1]]))



<tensorflow.python.keras.preprocessing.image.NumpyArrayIterator at 0x1d5c0e7b7f0>

In [90]:
custom_cnn.fit(datagen)

ValueError: Failed to find data adapter that can handle input: <class 'tensorflow.python.keras.preprocessing.image.ImageDataGenerator'>, <class 'NoneType'>

In [83]:
np.ones((1, 12))

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

In [77]:
help(datagen.fit)

Help on method fit in module keras_preprocessing.image.image_data_generator:

fit(x, augment=False, rounds=1, seed=None) method of tensorflow.python.keras.preprocessing.image.ImageDataGenerator instance
    Fits the data generator to some sample data.
    
    This computes the internal data stats related to the
    data-dependent transformations, based on an array of sample data.
    
    Only required if `featurewise_center` or
    `featurewise_std_normalization` or `zca_whitening` are set to True.
    
    When `rescale` is set to a value, rescaling is applied to
    sample data before computing the internal data stats.
    
    # Arguments
        x: Sample data. Should have rank 4.
         In case of grayscale data,
         the channels axis should have value 1, in case
         of RGB data, it should have value 3, and in case
         of RGBA data, it should have value 4.
        augment: Boolean (default: False).
            Whether to fit on randomly augmented samples.
      

In [68]:
custom_cnn.fit_generator(generator=datagen)

ValueError: Failed to find data adapter that can handle input: <class 'tensorflow.python.keras.preprocessing.image.ImageDataGenerator'>, <class 'NoneType'>

In [151]:
# Figure out the error for fitting first argument
custom_cnn.fit(X_train, y_classes)

KeyboardInterrupt: 

In [121]:
np.unique(y_train)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=uint8)

In [22]:
model = build_model(shape=(32, 32, 3), n_classes=10)

In [19]:
x_train[0].shape

(32, 32, 3)

In [156]:
X_train.shape

(19, 1368, 1712, 3)

In [155]:
x_train.shape

(50000, 32, 32, 3)

In [159]:
y_classes.shape

(19, 1)

In [29]:
x_train.shape


(50000, 32, 32, 3)

In [26]:
import keras
from keras import utils

from keras.datasets import cifar10
from tensorflow.keras.preprocessing.image import ImageDataGenerator

num_classes = 10
epochs = 10

(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = utils.to_categorical(y_train, num_classes)
y_test = utils.to_categorical(y_test, num_classes)

# model = build_model(shape=(32, 32, 3), n_classes=10)
# model.compile("adam", "mean_squared_error")

# datagen = ImageDataGenerator(
#     horizontal_flip=True
# )

# datagen.fit(x_train)
# # fits the model on batches with real-time data augmentation:
# model.fit(datagen.flow(x_train, y_train, batch_size=32), epochs=1)