In [181]:
import numpy as np
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.metrics import classification_report


dataLabels = pd.read_csv('Image_classification_data/data_labels_mainData.csv')
dataLabels['isCancerous'] = dataLabels['isCancerous'].astype(str)

modelToTrain =  "both" # "cellType" # "isCancerous" # 

img_size = (27, 27)
batch_size = 32

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)


data_dir = 'Image_classification_data/patch_images'  


isCancerous_train_data = datagen.flow_from_dataframe(
    dataframe=dataLabels,
    directory='Image_classification_data/patch_images',
    x_col='ImageName', 
    y_col='isCancerous',  
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    subset='training' 
)

cellType_train_data = datagen.flow_from_dataframe(
    dataframe=dataLabels,
    directory='Image_classification_data/patch_images',
    x_col='ImageName', 
    y_col='cellTypeName',  
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training' 
)

isCancerous_val_data = datagen.flow_from_dataframe(
    dataframe=dataLabels,
    directory='Image_classification_data/patch_images',
    x_col='ImageName',  
    y_col='isCancerous',  
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    subset='validation'
)

cellType_val_data = datagen.flow_from_dataframe(
    dataframe=dataLabels,
    directory='Image_classification_data/patch_images',
    x_col='ImageName',  
    y_col='cellTypeName',  
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

dataLabels['isCancerous'].value_counts()

Found 7917 validated image filenames belonging to 2 classes.
Found 7917 validated image filenames belonging to 4 classes.
Found 1979 validated image filenames belonging to 2 classes.
Found 1979 validated image filenames belonging to 4 classes.


isCancerous
0    5817
1    4079
Name: count, dtype: int64

In [169]:
reg_lambda = 0.001


if os.path.exists('isCancerousModel.keras'):
    isCancerousModel = load_model('isCancerousModel.keras')  
    print("isCancerous model loaded successfully.")
else:
    isCancerousModel = Sequential([
        Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(27, 27, 3)), 
        Conv2D(32, (3, 3), activation='relu', padding='same'), 
        MaxPooling2D(2, 2),
            
        Conv2D(64, (3, 3), padding='same', activation='relu'),
        Conv2D(64, (3, 3), padding='same', activation='relu'),
        MaxPooling2D(2, 2),

        Conv2D(128, (3, 3), padding='same', activation='relu'),
        Conv2D(128, (3, 3), padding='same', activation='relu'),
        MaxPooling2D(2, 2),

        Flatten(),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid') 
    ])
    print("No isCancerous model found, creating new one")


if os.path.exists('cellTypeModel.keras'):
    cellTypeModel = load_model('cellTypeModel.keras')  
    print("cellType model loaded successfully.")
else:
    cellTypeModel = Sequential([
        Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(27, 27, 3)),
        Conv2D(32, (3, 3), activation='relu', padding='same'),
        MaxPooling2D(2, 2),
            
        Conv2D(64, (3, 3), padding='same', activation='relu'),
        Conv2D(64, (3, 3), padding='same', activation='relu'),
        MaxPooling2D(2, 2),

        Flatten(),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dense(4, activation='softmax') 
    ])
    print("No cellType model found, creating new one")


isCancerous model loaded successfully.
cellType model loaded successfully.


In [170]:
if modelToTrain == "isCancerous" or modelToTrain == "both":
    isCancerousModel.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    isCancerousModel.fit(
        isCancerous_train_data,
        validation_data=isCancerous_val_data,
        epochs=50,
        callbacks=[early_stop],
    )
if modelToTrain == "cellType" or modelToTrain == "both":
    cellTypeModel.compile(
    optimizer='adam',
    loss=tf.keras.losses.CategoricalCrossentropy(from_logits=True),
    metrics=['categorical_accuracy']
    )

    cellTypeModel.fit(
        cellType_train_data,
        validation_data=cellType_val_data,
        epochs=50,
        callbacks=[early_stop],
    )

Epoch 1/50


  self._warn_if_super_not_called()


[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 51ms/step - accuracy: 0.8755 - loss: 0.3111 - val_accuracy: 0.9020 - val_loss: 0.2621
Epoch 2/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 35ms/step - accuracy: 0.8871 - loss: 0.2697 - val_accuracy: 0.8706 - val_loss: 0.2835
Epoch 3/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 34ms/step - accuracy: 0.8872 - loss: 0.2672 - val_accuracy: 0.8868 - val_loss: 0.2686
Epoch 4/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 34ms/step - accuracy: 0.8782 - loss: 0.2959 - val_accuracy: 0.8969 - val_loss: 0.2756
Epoch 5/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 33ms/step - accuracy: 0.8938 - loss: 0.2548 - val_accuracy: 0.8792 - val_loss: 0.2842
Epoch 6/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 32ms/step - accuracy: 0.8998 - loss: 0.2436 - val_accuracy: 0.8888 - val_loss: 0.2910
Epoch 1/50


  output, from_logits = _get_logits(


[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 51ms/step - categorical_accuracy: 0.7297 - loss: 0.6858 - val_categorical_accuracy: 0.7034 - val_loss: 0.8092
Epoch 2/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 34ms/step - categorical_accuracy: 0.7452 - loss: 0.6493 - val_categorical_accuracy: 0.6700 - val_loss: 0.8464
Epoch 3/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 36ms/step - categorical_accuracy: 0.7548 - loss: 0.6274 - val_categorical_accuracy: 0.7352 - val_loss: 0.7527
Epoch 4/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 37ms/step - categorical_accuracy: 0.7613 - loss: 0.6060 - val_categorical_accuracy: 0.7256 - val_loss: 0.7328
Epoch 5/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 36ms/step - categorical_accuracy: 0.7612 - loss: 0.5938 - val_categorical_accuracy: 0.6862 - val_loss: 0.8658
Epoch 6/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0

In [None]:
if modelToTrain == "isCancerous" or modelToTrain == "both":
    print("Is cancerous: ")
    isCancerousModel.evaluate(isCancerous_val_data)
    isCancerousModel.save('isCancerousModel.keras')

if modelToTrain == "cellType" or modelToTrain == "both":
    print("Cell type: ")
    cellTypeModel.evaluate(cellType_val_data)
    cellTypeModel.save('cellTypeModel.keras')

isCancerous_y_true = isCancerous_val_data.classes
isCancerous_y_pred = (isCancerousModel.predict(isCancerous_val_data) > 0.5).astype(int)
print("Cancerous Model Classification Report:")
print(classification_report(isCancerous_y_true, isCancerous_y_pred))

# For cellTypeModel (multiclass classification):
cellType_y_true = cellType_val_data.classes
cellType_y_pred = np.argmax(cellTypeModel.predict(cellType_val_data), axis=1)
print("Cell Type Model Classification Report:")
print(classification_report(cellType_y_true, cellType_y_pred))

Is cancerous: 
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 31ms/step - accuracy: 0.9014 - loss: 0.2613
Cell type: 
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 29ms/step - categorical_accuracy: 0.7425 - loss: 0.7150
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 27ms/step
              precision    recall  f1-score   support

           0       0.63      0.64      0.64      1270
           1       0.34      0.32      0.33       709

    accuracy                           0.53      1979
   macro avg       0.48      0.48      0.48      1979
weighted avg       0.52      0.53      0.53      1979

[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 36ms/step


ValueError: Classification metrics can't handle a mix of multiclass and multilabel-indicator targets