In [20]:
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense

# Load and preprocess the dataset
data = pd.read_csv('your_dataset.csv')  # Replace 'your_dataset.csv' with your actual dataset file
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

# Define the image data generator
train_datagen = ImageDataGenerator(rescale=1./255)
val_datagen = ImageDataGenerator(rescale=1./255)

# Define the batch size and input shape
batch_size = 64
input_shape = (27,27,3)  # Adjust the input shape according to your image dimensions

# Generate the training and validation data
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory='./',  # Replace with the directory where your images are located
    x_col='ImageName',
    y_col='cellType',
    target_size=(input_shape[0], input_shape[1]),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=True
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_data,
    directory='./',  # Replace with the directory where your images are located
    x_col='ImageName',
    y_col='cellType',
    target_size=(input_shape[0], input_shape[1]),
    batch_size=batch_size,
    class_mode='categorical',
    shuffle=False
)

# Define the model architecture
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', input_shape=input_shape))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(64, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Conv2D(128, (3, 3), activation='relu'))
model.add(MaxPooling2D((2, 2)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dense(num_classes, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_data) // batch_size,
    epochs=num_epochs,
    validation_data=val_generator,
    validation_steps=len(val_data) // batch_size
)

# Evaluate the model on the validation set
val_loss, val_accuracy = model.evaluate(val_generator)
print('Validation Loss:', val_loss)
print('Validation Accuracy:', val_accuracy)


Found 0 validated image filenames belonging to 0 classes.
Found 0 validated image filenames belonging to 0 classes.
Model: "sequential_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_9 (Conv2D)           (None, 25, 25, 32)        896       
                                                                 
 max_pooling2d_9 (MaxPooling  (None, 12, 12, 32)       0         
 2D)                                                             
                                                                 
 conv2d_10 (Conv2D)          (None, 10, 10, 64)        18496     
                                                                 
 max_pooling2d_10 (MaxPoolin  (None, 5, 5, 64)         0         
 g2D)                                                            
                                                                 
 conv2d_11 (Conv2D)          (None, 3, 3, 128)         73856     
    



In [21]:
train_data

Unnamed: 0,InstanceID,patientID,ImageName,cellTypeName,cellType,isCancerous
3370,1045,19,1045.png,inflammatory,1,0
5753,19809,33,19809.png,inflammatory,1,0
3566,370,20,370.png,others,3,0
1076,4815,8,4815.png,epithelial,2,1
7318,3312,46,3312.png,epithelial,2,1
...,...,...,...,...,...,...
9500,21223,57,21223.png,inflammatory,1,0
6721,10106,41,10106.png,epithelial,2,1
7842,5489,50,5489.png,fibroblast,0,0
5524,11894,31,11894.png,inflammatory,1,0


In [22]:
# Convert cell type categories to one-hot encoded vectors
one_hot_labels = pd.get_dummies(data['cellType'])

# Split the data into training and validation sets
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

# Create the generators with one-hot encoded labels
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_data,
    directory='./',
    x_col='ImageName',
    y_col=list(one_hot_labels.columns),
    target_size=(input_shape[0], input_shape[1]),
    batch_size=batch_size,
    class_mode='raw',
    shuffle=True
)

val_generator = val_datagen.flow_from_dataframe(
    dataframe=val_data,
    directory='./',
    x_col='ImageName',
    y_col=list(one_hot_labels.columns),
    target_size=(input_shape[0], input_shape[1]),
    batch_size=batch_size,
    class_mode='raw',
    shuffle=False
)

# Modify the model architecture to include the cell type information
input_layer = tf.keras.layers.Input(shape=input_shape)
conv1 = tf.keras.layers.Conv2D(32, (3, 3), activation='relu')(input_layer)
maxpool1 = tf.keras.layers.MaxPooling2D((2, 2))(conv1)
conv2 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(maxpool1)
maxpool2 = tf.keras.layers.MaxPooling2D((2, 2))(conv2)
conv3 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu')(maxpool2)
maxpool3 = tf.keras.layers.MaxPooling2D((2, 2))(conv3)
flatten = tf.keras.layers.Flatten()(maxpool3)
dense1 = tf.keras.layers.Dense(64, activation='relu')(flatten)

# Include the one-hot encoded cell type labels as input
cell_type_input = tf.keras.layers.Input(shape=(len(one_hot_labels.columns),))
concatenated = tf.keras.layers.Concatenate()([dense1, cell_type_input])

dense2 = tf.keras.layers.Dense(64, activation='relu')(concatenated)
output = tf.keras.layers.Dense(len(one_hot_labels.columns), activation='softmax')(dense2)

model = tf.keras.models.Model(inputs=[input_layer, cell_type_input], outputs=output)
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

# Train the model
history = model.fit(
    [train_generator.next()[0], one_hot_labels.loc[train_generator.next()[1].index]],
    steps_per_epoch=len(train


ValueError: Asked to retrieve element 0, but the Sequence has length 0