In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from tensorflow.keras.applications import *
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, GlobalAveragePooling2D
from tensorflow.keras.optimizers import Adam


import os


In [9]:
from sklearn.ensemble import RandomForestClassifier

In [2]:
#configuring gpu memory usage

gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    try:
        # Set a memory limit for the first GPU (assuming at least one GPU is available)
        tf.config.experimental.set_virtual_device_configuration(
            gpus[0],
            [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=4096)])  # Limit to 4GB
        
        # After setting the configuration, list logical GPUs to verify the configuration
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        
        # Print information about the available GPUs and logical GPUs
        print(len(gpus), "Physical GPU(s) available,", len(logical_gpus), "Logical GPU(s) configured.")
        
    except RuntimeError as e:
        # Print any runtime error that occurs during configuration
        print("Error:", e)
else:
    print("No GPU(s) available. TensorFlow cannot use GPU acceleration.")


1 Physical GPU(s) available, 1 Logical GPU(s) configured.


## Reading and preparing data for model training, Using ImageDataGenrator from tensorflow to use image augmentations for training. 

In [3]:
df = pd.read_csv("../balanced_data.csv")
df['cancer'] = df['cancer'].astype(str)

cropped_image_names = []
for im in os.listdir('../CroppedDataset2'):
    cropped_image_names.append(im)

df['cropped_image_path'] = cropped_image_names


train_df, val_df = train_test_split(df, test_size=0.08, random_state=101)

In [11]:
IMAGE_SIZE = (256, 256)
BATCH_SIZE = 16
NUM_EPOCHS = 30
LEARNING_RATE = 0.001
INPUT_FOLDER = '../CroppedDataset2'

In [12]:
#setting up the training and validation datagenerator objects.

train_datagen = ImageDataGenerator(
    rescale=1./255,             
    rotation_range=20,          
    width_shift_range=0.2,      
    height_shift_range=0.2,     
    shear_range=0.2,            
    zoom_range=0.2,             
    horizontal_flip=True       
)

val_datagen = ImageDataGenerator(rescale=1./255)

In [13]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,
    directory=INPUT_FOLDER,                 
    x_col='cropped_image_path',                      
    y_col='cancer',                       
    target_size=IMAGE_SIZE,                
    batch_size=16,
    class_mode='binary'                   
)


val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_df,
    directory=INPUT_FOLDER,
    x_col='cropped_image_path',
    y_col='cancer',
    target_size=IMAGE_SIZE,
    batch_size=16,
    class_mode='binary'
)

Found 2130 validated image filenames belonging to 2 classes.
Found 186 validated image filenames belonging to 2 classes.


## Extracting features from 4 layer CNN and preparing those features so that they can be fed into a random forest classifiers followed by result metrics.

In [14]:
cnn_for_features = Sequential()

cnn_for_features.add(Conv2D(32, (3, 3), activation='relu', input_shape=(256,256,3)))
cnn_for_features.add(MaxPooling2D((2, 2)))

cnn_for_features.add(Conv2D(64, (3, 3), activation='relu'))
cnn_for_features.add(MaxPooling2D((2, 2)))

cnn_for_features.add(Conv2D(128, (3, 3), activation='relu'))
cnn_for_features.add(MaxPooling2D((2, 2)))

cnn_for_features.add(Conv2D(256, (3, 3), activation='relu'))
cnn_for_features.add(MaxPooling2D((2, 2)))


cnn_for_features.add(Flatten())


In [15]:
cnn_features_for_rfc_train = cnn_for_features.predict(train_generator)
cnn_features_for_rfc_val = cnn_for_features.predict(val_generator)



In [21]:
rfc = RandomForestClassifier(n_estimators=50,random_state=101)


In [22]:
rfc.fit(cnn_features_for_rfc_train,train_generator.classes)

In [23]:
predictions = rfc.predict(cnn_features_for_rfc_val)

In [24]:
predictions

array([1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0,
       1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0,
       1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 1,
       1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0,
       0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0,
       0, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
       1, 0, 1, 1, 1, 1, 1, 1, 0, 0])

In [26]:
y_true = val_generator.classes

In [27]:
from sklearn.metrics import confusion_matrix, accuracy_score,classification_report

In [29]:
conf_matrix = confusion_matrix(y_true, predictions)
class_names = val_generator.class_indices.keys()
report = classification_report(y_true, predictions, target_names=class_names)

In [30]:
print(report)

              precision    recall  f1-score   support

           0       0.56      0.66      0.60        94
           1       0.57      0.47      0.51        92

    accuracy                           0.56       186
   macro avg       0.57      0.56      0.56       186
weighted avg       0.57      0.56      0.56       186

