In [94]:
import numpy as np
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
import os

dataLabels = pd.read_csv('Image_classification_data/data_labels_mainData.csv')
dataLabels['isCancerous'] = dataLabels['isCancerous'].astype(str)

modelToTrain =  "both" # "cellType" # "isCancerous" # 

img_size = (27, 27)
batch_size = 32

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2
)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)


data_dir = 'Image_classification_data/patch_images'  


isCancerous_train_data = datagen.flow_from_dataframe(
    dataframe=dataLabels,
    directory='Image_classification_data/patch_images',
    x_col='ImageName', 
    y_col='isCancerous',  
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    subset='training' 
)

cellType_train_data = datagen.flow_from_dataframe(
    dataframe=dataLabels,
    directory='Image_classification_data/patch_images',
    x_col='ImageName', 
    y_col='cellTypeName',  
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training' 
)

isCancerous_val_data = datagen.flow_from_dataframe(
    dataframe=dataLabels,
    directory='Image_classification_data/patch_images',
    x_col='ImageName',  
    y_col='isCancerous',  
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    subset='validation'
)

cellType_val_data = datagen.flow_from_dataframe(
    dataframe=dataLabels,
    directory='Image_classification_data/patch_images',
    x_col='ImageName',  
    y_col='cellTypeName',  
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

dataLabels['isCancerous'].value_counts()

Found 7917 validated image filenames belonging to 2 classes.
Found 7917 validated image filenames belonging to 4 classes.
Found 1979 validated image filenames belonging to 2 classes.
Found 1979 validated image filenames belonging to 4 classes.


isCancerous
0    5817
1    4079
Name: count, dtype: int64

In [95]:

if os.path.exists('isCancerousModel.keras'):
    isCancerousModel = load_model('isCancerousModel.keras')  
    print("isCancerous model loaded successfully.")
else:
    isCancerousModel = Sequential([
        Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(27, 27, 3)),
        MaxPooling2D(2, 2),
            
        Conv2D(64, (3, 3), padding='same', activation='relu'),
        MaxPooling2D(2, 2),

        Conv2D(128, (3, 3), padding='same', activation='relu'),
        MaxPooling2D(2, 2),

        Flatten(),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid') 
    ])
    print("No isCancerous model found, creating new one")


if os.path.exists('cellTypeModel.keras'):
    cellTypeModel = load_model('cellTypeModel.keras')  
    print("cellType model loaded successfully.")
else:
    cellTypeModel = Sequential([
        Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(27, 27, 3)),
        MaxPooling2D(2, 2),
            
        Conv2D(64, (3, 3), padding='same', activation='relu'),
        MaxPooling2D(2, 2),

        Flatten(),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dense(4, activation='softmax') 
    ])
    print("No cellType model found, creating new one")


isCancerous model loaded successfully.
cellType model loaded successfully.


In [None]:
if modelToTrain == "isCancerous" or modelToTrain == "both":
    isCancerousModel.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    isCancerousModel.fit(
        isCancerous_train_data,
        validation_data=isCancerous_val_data,
        epochs=50,
        callbacks=[early_stop],
    )
if modelToTrain == "cellType" or modelToTrain == "both":
    cellTypeModel.compile(
    optimizer='adam',
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['categorical_accuracy']
    )

    cellTypeModel.fit(
        cellType_train_data,
        validation_data=cellType_val_data,
        epochs=50,
        callbacks=[early_stop],
    )

Epoch 1/50


  self._warn_if_super_not_called()


[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 28ms/step - accuracy: 0.8867 - loss: 0.2702 - val_accuracy: 0.9065 - val_loss: 0.2437
Epoch 2/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.9072 - loss: 0.2328 - val_accuracy: 0.8934 - val_loss: 0.2571
Epoch 3/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.9160 - loss: 0.2076 - val_accuracy: 0.9015 - val_loss: 0.2610
Epoch 4/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 14ms/step - accuracy: 0.9112 - loss: 0.2212 - val_accuracy: 0.9025 - val_loss: 0.2546
Epoch 5/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 17ms/step - accuracy: 0.9122 - loss: 0.2082 - val_accuracy: 0.9085 - val_loss: 0.2570
Epoch 6/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 16ms/step - accuracy: 0.9183 - loss: 0.2012 - val_accuracy: 0.7868 - val_loss: 0.4793


In [None]:
if modelToTrain == "isCancerous" or modelToTrain == "both":
    print("Is cancerous: ")
    isCancerousModel.evaluate(isCancerous_val_data)
    isCancerousModel.save('isCancerousModel.keras')

if modelToTrain == "cellType" or modelToTrain == "both":
    print("Cell type: ")
    cellTypeModel.evaluate(cellType_val_data)
    cellTypeModel.save('cellTypeModel.keras')

Is cancerous: 
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.9043 - loss: 0.2458
