In [1]:
!pip install pillow



In [2]:
!pip install opencv-python



In [3]:
# Basic Imports
import numpy as np
import pandas as pd

# Project Imports
import os
import cv2
from PIL import Image

In [4]:
# Path & Variables
CUR_DIR = os.getcwd()
DATA_DIR = os.path.join(CUR_DIR, 'data')

RAW_DATA = os.path.join(DATA_DIR, 'raw_data')
DATA_CLEAN = os.path.join(DATA_DIR, 'data_clean')

DIM = (128, 128)

In [5]:
# Check & create target dir
if not os.path.exists(DATA_CLEAN):
    os.makedirs(DATA_CLEAN)

In [6]:
# Convert & Process image into npy

for file in os.listdir(RAW_DATA):
    if file.endswith(".jpg"):
        img = cv2.imread(os.path.join(RAW_DATA, file))
        resize_img = cv2.resize(img, DIM)
        # img_array = np.array(img)
        file_name = os.path.splitext(file)[0]
        np.save(os.path.join(DATA_CLEAN, file_name), resize_img)

In [9]:
# Adding the target
metadata = pd.read_excel(os.path.join(DATA_DIR, 'metadata.xlsx'))
img = np.load(os.path.join(DATA_CLEAN, os.listdir(DATA_CLEAN)[0]))

metadata_sample = metadata[['cancer', 'image_id']]
metadata_sample.head(2)

Unnamed: 0,cancer,image_id
0,0,462822612
1,0,1459541791


In [10]:
# Create CSV for the sample (array & Id)
image_id = []
img_arrays = []

for img in os.listdir(DATA_CLEAN):
    image_id.append(np.int64(int(os.path.splitext(img)[0])))
    img_arrays.append(np.load(os.path.join(DATA_CLEAN, img)))

img_csv = pd.DataFrame(dict(
    image_id=image_id,
    image_array=img_arrays
))


In [11]:
final_csv = img_csv.merge(metadata_sample, on= 'image_id', how='left')
final_csv.shape

(1410, 3)

In [12]:
final_csv.head()

Unnamed: 0,image_id,image_array,cancer
0,478078862,"[[[155, 155, 155], [155, 155, 155], [158, 158,...",0
1,1587335740,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
2,268663025,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
3,1942326353,"[[[174, 174, 174], [174, 174, 174], [174, 174,...",0
4,1927587225,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0


In [20]:
final_csv

Unnamed: 0,image_id,image_array,cancer
0,478078862,"[[[155, 155, 155], [155, 155, 155], [158, 158,...",0
1,1587335740,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
2,268663025,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
3,1942326353,"[[[174, 174, 174], [174, 174, 174], [174, 174,...",0
4,1927587225,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
...,...,...,...
1405,1565871400,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
1406,515696698,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
1407,2071610129,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
1408,537917766,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0


In [14]:
final_csv['image_array'][0].shape

(128, 128, 3)

In [21]:
# final_csv['image_array'].shape
final_csv['image_array']

pandas.core.series.Series

In [23]:
tmp_df = final_csv.sample(frac=0.1)
tmp_df

Unnamed: 0,image_id,image_array,cancer
786,330079921,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
552,205845801,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
515,993674425,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
504,2053930931,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
287,358781917,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
...,...,...,...
474,1596966751,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
636,1393698380,"[[[255, 255, 255], [255, 255, 255], [255, 255,...",0
1150,639112043,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0
614,1687362173,"[[[0, 0, 0], [0, 0, 0], [0, 0, 0], [0, 0, 0], ...",0


In [24]:
tmp_array = np.array(tmp_df['image_array'])


In [27]:
tmp_array

array([array([[[  0,   0,   0],
               [  0,   0,   0],
               [  0,   0,   0],
               ...,
               [  0,   0,   0],
               [  0,   0,   0],
               [  0,   0,   0]],

              [[  0,   0,   0],
               [  0,   0,   0],
               [  0,   0,   0],
               ...,
               [  0,   0,   0],
               [  0,   0,   0],
               [  0,   0,   0]],

              [[  0,   0,   0],
               [  0,   0,   0],
               [  0,   0,   0],
               ...,
               [  0,   0,   0],
               [  0,   0,   0],
               [  0,   0,   0]],

              ...,

              [[  0,   0,   0],
               [  0,   0,   0],
               [  0,   0,   0],
               ...,
               [  0,   0,   0],
               [ 10,  10,  10],
               [254, 254, 254]],

              [[  0,   0,   0],
               [  0,   0,   0],
               [  0,   0,   0],
               ...,
        

In [15]:
# Model
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

def initialize_model(X):

    model = Sequential()

    model.add(Conv2D(16, (4, 4), input_shape = (X[0].shape[0], X[0].shape[1], X[0].shape[2]), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Conv2D(32, (3, 3), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, (3, 3), activation = 'relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Flatten())
    model.add(Dense(units = 10, activation = 'relu'))
    model.add(Dense(units = 1, activation = 'sigmoid'))
    model.compile(loss='binary_crossentropy',
                 optimizer='adam',
                 metrics =['accuracy', 'Recall'])

    return model

2024-03-07 17:28:16.154080: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-07 17:28:16.186296: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-03-07 17:28:16.186324: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-03-07 17:28:16.187137: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-03-07 17:28:16.191750: I external/local_tsl/tsl/cuda/cudart_stub.cc:31] Could not find cuda drivers on your machine, GPU will not be used.
2024-03-07 17:28:16.192699: I tensorflow/core/platform/cpu_feature_guard.cc:1

In [19]:
# Get Feature & target
X = final_csv.image_array
y = final_csv.cancer
X.shape, y.shape

((1410,), (1410,))

In [None]:
# Init Model
model = initialize_model(X)
model.summary()

Model: "sequential_7"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_12 (Conv2D)          (None, 253, 253, 16)      784       
                                                                 
 max_pooling2d_12 (MaxPooli  (None, 126, 126, 16)      0         
 ng2D)                                                           
                                                                 
 conv2d_13 (Conv2D)          (None, 124, 124, 32)      4640      
                                                                 
 max_pooling2d_13 (MaxPooli  (None, 62, 62, 32)        0         
 ng2D)                                                           
                                                                 
 conv2d_14 (Conv2D)          (None, 60, 60, 64)        18496     
                                                                 
 max_pooling2d_14 (MaxPooli  (None, 30, 30, 64)       

In [None]:
# Convert


pandas.core.series.Series

In [18]:
from tensorflow.keras.datasets import mnist
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train.shape

(60000, 28, 28)

In [None]:
# One Hot Encode our Target for TensorFlow processing
from tensorflow.keras.utils import to_categorical
y_cat = to_categorical(y, num_classes=2)
y.shape, y_cat.shape

((1667,), (1667, 2))

In [None]:

history = model.fit(X,
                    y_cat,
                    validation_split = 0.2,
                    batch_size = 32,
                    epochs = 5,
                    verbose = 1)

ValueError: Failed to convert a NumPy array to a Tensor (Unsupported object type numpy.ndarray).