**Dataset Link: [UBC Ovarian Cancer Subtype Classification and Outlier Detection](https://www.kaggle.com/competitions/UBC-OCEAN/data)**

In [None]:
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping
import pandas as pd
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

In [None]:
df = pd.read_csv('train.csv')

In [None]:
# Update the DataFrame to include the correct filenames
df['image_id'] = df['image_id'].astype(str) + '_thumbnail.png'

# Show the first few rows of the updated DataFrame
df.head()


Unnamed: 0,image_id,label,image_width,image_height,is_tma
0,4_thumbnail.png,HGSC,23785,20008,False
1,66_thumbnail.png,LGSC,48871,48195,False
2,91_thumbnail.png,HGSC,3388,3388,True
3,281_thumbnail.png,LGSC,42309,15545,False
4,286_thumbnail.png,EC,37204,30020,False


In [None]:
class_labels=['CC', 'EC', 'HGSC', 'LGSC', 'MC']
true_label = 2
predicted_class =

In [None]:
import pandas as pd

# Assuming df is your DataFrame
df['label'] = df['label'].astype('category')

# If you also want to access the numerical representation of the categories, you can use cat.codes
df['label_encoded'] = df['label'].cat.codes

# Display the updated DataFrame
df

Unnamed: 0,image_id,label,image_width,image_height,is_tma,label_encoded
0,4_thumbnail.png,HGSC,23785,20008,False,2
1,66_thumbnail.png,LGSC,48871,48195,False,3
2,91_thumbnail.png,HGSC,3388,3388,True,2
3,281_thumbnail.png,LGSC,42309,15545,False,3
4,286_thumbnail.png,EC,37204,30020,False,1
...,...,...,...,...,...,...
533,65022_thumbnail.png,LGSC,53355,46675,False,3
534,65094_thumbnail.png,MC,55042,45080,False,4
535,65300_thumbnail.png,HGSC,75860,27503,False,2
536,65371_thumbnail.png,HGSC,42551,41800,False,2


In [None]:
image_path = '/content/drive/MyDrive/ubc'

In [None]:
# Create train generator
train_datagen = ImageDataGenerator(
    rotation_range=15,
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.15,
    zoom_range=0.1,
    validation_split=0.2,
    horizontal_flip=True)



In [None]:
train_generator = train_datagen.flow_from_dataframe(
    dataframe=df,
    directory= image_path,  # Replace with your image folder path
    x_col='image_id',
    y_col='label',
    subset='training',
    batch_size=32,
    shuffle=True,
    class_mode='categorical',
    target_size=(243, 243))

Found 411 validated image filenames belonging to 5 classes.




In [None]:
# Create validation generator
validation_generator = train_datagen.flow_from_dataframe(
    dataframe=df,
    directory=image_path,
    x_col='image_id',
    y_col='label',
    target_size=(243, 243),
    batch_size=32,
    class_mode='categorical',
    subset='validation'  # Use 'validation' instead of 'training' for the validation set
)


Found 102 validated image filenames belonging to 5 classes.




In [None]:
# Model Definition
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=(243, 243, 3)),
    MaxPooling2D(2, 2),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(2, 2),
    Flatten(),
    Dense(512, activation='relu'),
    Dense(df['label'].nunique(), activation='softmax')  # Number of classes
])

In [None]:
# Compile the Model
model.compile(optimizer=Adam(learning_rate=0.001), loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# Early Stopping Callback
early_stopping = EarlyStopping(monitor='val_loss', patience=5)

In [None]:
# Train the Model
history = model.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator,
    callbacks=[early_stopping]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
from tensorflow.keras.applications import DenseNet121
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Input

# Load the pre-trained DenseNet121 model
base_model = DenseNet121(weights='imagenet', include_top=False)

# Add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)

# Add a fully-connected layer
x = Dense(1024, activation='relu')(x)

# Add a logistic layer for classification (change num_classes to match your dataset)
num_classes = 5
predictions = Dense(num_classes, activation='softmax')(x)

# This is the model we will train
model2 = Model(inputs=base_model.input, outputs=predictions)

# Compile the model (should be done *after* setting layers to non-trainable)
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5


In [None]:
# Train the Model
history = model2.fit(
    train_generator,
    epochs=10,
    validation_data=validation_generator,
    callbacks=[early_stopping]
)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [None]:
#Testing the data using both the models

In [None]:
from keras.models import load_model
from keras.preprocessing import image
from PIL import Image
import numpy as np

# Increase the maximum number of pixels
Image.MAX_IMAGE_PIXELS = None

# Load and preprocess the test image
test_image_path = '/content/41.png'
img = image.load_img(test_image_path, target_size=(243, 243))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)

# Normalize the image if it was done during training
img_array /= 255.0  # Only if your training images were scaled in this way

# Make a prediction
predictions = model.predict(img_array)

# Assuming a multi-class classification, get the class with the highest probability
predicted_class = np.argmax(predictions, axis=1)

# Print the predicted class
print("Predicted class:", predicted_class)


Predicted class: [2]


In [None]:
# Load the trained model


# Load and preprocess the test image
test_image_path = '/content/41.png'
img = image.load_img(test_image_path, target_size=(243, 243))
img_array = image.img_to_array(img)
img_array = np.expand_dims(img_array, axis=0)

# Normalize the image if it was done during training
img_array /= 255.0

# Make a prediction
predictions = model2.predict(img_array)

# Assuming a multi-class classification, get the class with the highest probability
predicted_class = np.argmax(predictions, axis=1)

# Print the predicted class
print("Predicted class:", predicted_class)


Predicted class: [2]


In [None]:
from sklearn.metrics import f1_score, classification_report

# Define the true labels for the test data
y_true = [0, 1, 2, 3, 4]  # Example true labels for the test data

# Define the single predicted class for the test data
predicted_class = 2  # Example single predicted class

# Calculate the F1 score for the single predicted class against each true class
f1_per_class = f1_score(y_true, [predicted_class] * len(y_true), average=None)

# Print the F1 score for the single predicted class against each true class
for class_label, f1 in enumerate(f1_per_class):
    print(f"F1-score for predicted class {predicted_class} against true class {class_label}: {f1}")

F1-score for predicted class 2 against true class 0: 0.0
F1-score for predicted class 2 against true class 1: 0.0
F1-score for predicted class 2 against true class 2: 0.33333333333333337
F1-score for predicted class 2 against true class 3: 0.0
F1-score for predicted class 2 against true class 4: 0.0


In [None]:
from sklearn.metrics import classification_report
class_labels=['CC', 'EC', 'HGSC', 'LGSC', 'MC']
true_label = 2
predicted_class = 2

# Convert class indices to class labels
predicted_label_name = class_labels[predicted_class]
true_label_name = class_labels[true_label] if isinstance(true_label, int) else true_label

# Generate a classification report
true_labels = [true_label_name]
predicted_labels = [predicted_label_name]

# Include all class labels in the report
all_class_indices = list(range(len(class_labels)))

# Print the classification report
print(classification_report(true_labels, predicted_labels, labels=all_class_indices, target_names=class_labels, zero_division=0))

              precision    recall  f1-score   support

          CC       0.00      0.00      0.00         0
          EC       0.00      0.00      0.00         0
        HGSC       0.00      0.00      0.00         0
        LGSC       0.00      0.00      0.00         0
          MC       0.00      0.00      0.00         0

   micro avg       0.00      0.00      0.00         0
   macro avg       0.00      0.00      0.00         0
weighted avg       0.00      0.00      0.00         0



  mask &= (ar1 != a)
  mask &= (ar1 != a)
  mask &= (ar1 != a)
  mask &= (ar1 != a)


In [None]:
#The predicted label is 2 i.e HSGC thats the true value to be predicted by the model