In [None]:
import numpy as np
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.utils import class_weight
from sklearn.metrics import classification_report


dataLabels = pd.read_csv('Image_classification_data/data_labels_mainData.csv')
dataLabels['isCancerous'] = dataLabels['isCancerous'].astype(str)

modelToTrain =  "isCancerous" # "both" # "cellType" 

img_size = (27, 27)
batch_size = 32

datagen = ImageDataGenerator(
    rescale=1./255,
    validation_split=0.2,
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1,
    zoom_range=0.1,
    horizontal_flip=True
)

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

class_weights = class_weight.compute_class_weight(
    class_weight='balanced',
    classes=np.unique(dataLabels['isCancerous']),
    y=dataLabels['isCancerous']
)
class_weights_dict = dict(enumerate(class_weights))

data_dir = 'Image_classification_data/patch_images'  


isCancerous_train_data = datagen.flow_from_dataframe(
    dataframe=dataLabels,
    directory='Image_classification_data/patch_images',
    x_col='ImageName', 
    y_col='isCancerous',  
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    subset='training' 
)

cellType_train_data = datagen.flow_from_dataframe(
    dataframe=dataLabels,
    directory='Image_classification_data/patch_images',
    x_col='ImageName', 
    y_col='cellTypeName',  
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='training' 
)

isCancerous_val_data = datagen.flow_from_dataframe(
    dataframe=dataLabels,
    directory='Image_classification_data/patch_images',
    x_col='ImageName',  
    y_col='isCancerous',  
    target_size=img_size,
    batch_size=batch_size,
    class_mode='binary',
    subset='validation'
)

cellType_val_data = datagen.flow_from_dataframe(
    dataframe=dataLabels,
    directory='Image_classification_data/patch_images',
    x_col='ImageName',  
    y_col='cellTypeName',  
    target_size=img_size,
    batch_size=batch_size,
    class_mode='categorical',
    subset='validation'
)

print(dataLabels['isCancerous'].value_counts())
print(dataLabels['cellTypeName'].value_counts())

Found 7917 validated image filenames belonging to 2 classes.
Found 7917 validated image filenames belonging to 4 classes.
Found 1979 validated image filenames belonging to 2 classes.
Found 1979 validated image filenames belonging to 4 classes.
isCancerous
0    5817
1    4079
Name: count, dtype: int64
cellTypeName
epithelial      4079
inflammatory    2543
fibroblast      1888
others          1386
Name: count, dtype: int64


In [251]:
reg_lambda = 0.001


if os.path.exists('isCancerousModel.keras'):
    isCancerousModel = load_model('isCancerousModel.keras')  
    print("isCancerous model loaded successfully.")
else:
    isCancerousModel = Sequential([
        Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(27, 27, 3)), 
        Conv2D(32, (3, 3), activation='relu', padding='same'), 
        MaxPooling2D(2, 2),
            
        Conv2D(64, (3, 3), padding='same', activation='relu'),
        Conv2D(64, (3, 3), padding='same', activation='relu'),
        MaxPooling2D(2, 2),

        Conv2D(128, (3, 3), padding='same', activation='relu'),
        Conv2D(128, (3, 3), padding='same', activation='relu'),
        MaxPooling2D(2, 2),

        Flatten(),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dense(1, activation='sigmoid') 
    ])
    print("No isCancerous model found, creating new one")


if os.path.exists('cellTypeModel.keras'):
    cellTypeModel = load_model('cellTypeModel.keras')  
    print("cellType model loaded successfully.")
else:
    cellTypeModel = Sequential([
        Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(27, 27, 3)),
        Conv2D(32, (3, 3), activation='relu', padding='same'),
        MaxPooling2D(2, 2),
            
        Conv2D(64, (3, 3), padding='same', activation='relu'),
        Conv2D(64, (3, 3), padding='same', activation='relu'),
        MaxPooling2D(2, 2),

        Flatten(),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dense(4, activation='softmax') 
    ])
    print("No cellType model found, creating new one")


No isCancerous model found, creating new one


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


cellType model loaded successfully.


In [252]:
if modelToTrain == "isCancerous" or modelToTrain == "both":
    isCancerousModel.compile(
        optimizer='adam',
        loss='binary_crossentropy',
        metrics=['accuracy']
    )

    isCancerousModel.fit(
        isCancerous_train_data,
        validation_data=isCancerous_val_data,
        epochs=50,
        callbacks=[early_stop],
        class_weight=class_weights_dict,
    )
if modelToTrain == "cellType" or modelToTrain == "both":
    cellTypeModel.compile(
    optimizer='adam',
    loss=tf.keras.losses.CategoricalCrossentropy(),
    metrics=['categorical_accuracy']
    )

    cellTypeModel.fit(
        cellType_train_data,
        validation_data=cellType_val_data,
        epochs=50,
        callbacks=[early_stop],
    )

Epoch 1/50


  self._warn_if_super_not_called()


[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 49ms/step - accuracy: 0.4274 - loss: 0.0127 - val_accuracy: 0.3583 - val_loss: 947.5905
Epoch 2/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 34ms/step - accuracy: 0.4228 - loss: 0.0000e+00 - val_accuracy: 0.3583 - val_loss: 946.9760
Epoch 3/50
[1m248/248[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 34ms/step - accuracy: 0.4195 - loss: 0.0000e+00 - val_accuracy: 0.3583 - val_loss: 947.0385
Epoch 4/50
[1m142/248[0m [32m━━━━━━━━━━━[0m[37m━━━━━━━━━[0m [1m3s[0m 29ms/step - accuracy: 0.4220 - loss: 0.0000e+00

KeyboardInterrupt: 

In [None]:
if modelToTrain == "isCancerous" or modelToTrain == "both":
    print("Is cancerous: ")
    isCancerousModel.evaluate(isCancerous_val_data)
    isCancerousModel.save('isCancerousModel.keras')

    isCancerous_y_true = isCancerous_val_data.classes
    isCancerous_y_pred = (isCancerousModel.predict(isCancerous_val_data) > 0.5).astype(int)
    print("Cancerous Model Classification Report:")
    print(classification_report(isCancerous_y_true, isCancerous_y_pred))

if modelToTrain == "cellType" or modelToTrain == "both":
    print("Cell type: ")
    cellTypeModel.evaluate(cellType_val_data)
    cellTypeModel.save('cellTypeModel.keras')
    
    cellType_y_true = cellType_val_data.classes
    cellType_y_pred = np.argmax(cellTypeModel.predict(cellType_val_data), axis=1)
    print("Cell Type Model Classification Report:")
    print(classification_report(cellType_y_true, cellType_y_pred))


Is cancerous: 
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 28ms/step - accuracy: 0.8942 - loss: 0.2855
[1m62/62[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 24ms/step
Cancerous Model Classification Report:
              precision    recall  f1-score   support

           0       0.65      0.65      0.65      1270
           1       0.37      0.36      0.36       709

    accuracy                           0.55      1979
   macro avg       0.51      0.51      0.51      1979
weighted avg       0.55      0.55      0.55      1979

