In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Inp*ut data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#IMPORTING LIBRARIES

import seaborn as sns
import matplotlib.pyplot as plt 
from PIL import Image
import tensorflow as tf
from tensorflow import keras
from keras.preprocessing.image import ImageDataGenerator
from keras import layers
from keras.models import Sequential
import cv2
import os
import warnings
warnings.filterwarnings("ignore")

In [None]:
#A VIEW OF THE TRAIN TABLE CONTAINNIG THE LABELS

train_df = pd.read_csv("//kaggle/input/histopathologic-cancer-detection/train_labels.csv")
train_df

In [None]:
sns.countplot(x='label',data=train_df)

Balanced Data

In [None]:
#Viewing Images

def plot_image(label,title):
    fig, ax = plt.subplots(1,4,figsize=(10,10))
    
    for num,axis in enumerate(ax.flat):
        path = "/kaggle/input/histopathologic-cancer-detection/train/" + train_df[train_df['label']==label].reset_index(drop=True).id[num]+".tif"
        image = plt.imread(path)
        axis.imshow(image)
    plt.title(title,loc="center")
    plt.show()
        
plot_image(1,"Positive Cases")
plot_image(0,"Negative Cases")

In [None]:
# Checking the resolution and contrast of a random subset of images
image_paths = train_df['id'].sample(5).apply(lambda x: f'../input/histopathologic-cancer-detection/train/{x}.tif').tolist()

for img_path in image_paths:
    img = cv2.imread(img_path)
    img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    
    # Displaying image details
    print(f"Image: {img_path.split('/')[-1]}")
    print(f"Resolution: {img.shape[0]}x{img.shape[1]}")
    print(f"Contrast (max-min pixel values): {np.max(img_rgb) - np.min(img_rgb)}\n")

In [None]:
train_df.isna().sum()

In [None]:
#No need to resize image, but function to resize is ...

from tensorflow.keras.preprocessing.image import load_img,img_to_array

IMG_WIDTH = 96
IMG_HEIGHT = 96
BATCH_SIZE = 64

def resize_image(image_path,target_width=IMG_WIDTH,target_height=IMG_HEIGHT):
    image = load_img(image_path,target_size=(target_width,target_height))
    return img_to_array(image)

In [None]:
#Splitting Data Into Train And Validation Sets


from sklearn.model_selection import train_test_split


train_df['label'] = train_df['label'].astype(str)
train_df['id'] = train_df['id'].apply(lambda x : f'{x}.tif')

train_set, val_set = train_test_split(train_df,test_size=0.1,random_state=0,stratify=train_df.label)

train_datagen = ImageDataGenerator(rescale = 1.0/255.0,zoom_range=0.4,brightness_range=(-2.0,2.0),
                             horizontal_flip=True,vertical_flip=True,rotation_range=20,
                             data_format="channels_last",width_shift_range=0.2,
                             height_shift_range=0.2)

val_datagen = ImageDataGenerator(rescale=1./255)

In [None]:
val_set.label.value_counts()

In [None]:
train_set.id.iloc[0]

In [None]:
#Loading Images To Create Train And Validation Generators


train_gen = train_datagen.flow_from_dataframe(directory="/kaggle/input/histopathologic-cancer-detection/train",
                                        dataframe=train_set,
                                       target_size=(IMG_WIDTH,IMG_HEIGHT),
                                       color_mode = "grayscale",
                                       x_col="id",
                                       y_col="label",
                                       batch_size=BATCH_SIZE,
                                       subset="training",
                                       class_mode="binary")


val_gen = val_datagen.flow_from_dataframe(dataframe=val_set,
                                        directory="/kaggle/input/histopathologic-cancer-detection/train",
                                       target_size=(IMG_WIDTH,IMG_HEIGHT),
                                       color_mode = "grayscale",
                                       x_col="id",
                                       y_col="label",
                                       batch_size=BATCH_SIZE,
                                       class_mode="binary",
                                      seed=123)

In [None]:
#Model Creation Using Tensorflow Sequential API

model = Sequential([
        keras.Input(shape=(IMG_WIDTH,IMG_HEIGHT,1)),
        layers.Conv2D(filters=128,kernel_size=3,padding="valid",kernel_initializer="he_uniform",activation="relu"),
        layers.MaxPooling2D(pool_size=(2,2),strides=2),
        layers.Conv2D(filters=128,kernel_size=2,padding="valid",kernel_initializer="he_uniform",activation="relu"),
        layers.MaxPooling2D(pool_size=(2,2),strides=2),
        layers.Conv2D(filters=256,kernel_size=3,padding="valid",kernel_initializer="he_uniform",activation="relu"),
        layers.MaxPooling2D(pool_size=(3,3),strides=3),
        layers.Flatten(),
        layers.Dense(1000,activation="relu"),
        layers.Dense(512,activation="relu"),
        layers.Dense(1,activation="sigmoid")
])

In [None]:
#Setting Metrics, Optimizer Function And Loss Function

model.compile(loss=keras.losses.BinaryCrossentropy(from_logits=True),optimizer=keras.optimizers.Adam(),metrics=[keras.metrics.AUC()])

In [None]:
#Begin Training

history = model.fit(train_gen,validation_data=val_gen,epochs=10,verbose=True)

In [None]:
#Custom Training Loops- Let's Have More Control On THe Training Process

num_epochs = 10
optimizer = keras.optimizers.Adam()
loss_fn = keras.losses.BinaryCrossentropy(from_logits=True)
train_metric = keras.metrics.AUC()
val_metric = keras.metrics.AUC()

for epoch in range(num_epochs):
    print(f"\n Start Of Training Epoch {epoch}")
    for batch_idx,(x_batch,y_batch) in enumerate(train_gen):
        with tf.GradientTape() as tape:
            y_pred = model(x_batch,training=True)
            loss = loss_fn(y_batch.reshape(-1,1),y_pred)
            
        gradients = tape.gradient(loss,model.trainable_weights)
        optimizer.apply_gradients(zip(gradients,model.trainable_weights))
        train_metric.update_state(y_batch,y_pred)
        print(train_metric.result())
        
    train_auc = train_metric.result()
    train_metric.reset_state()
    print(f"AUC over epoch {train_auc}")

In [None]:
for batch_idx, (x_batch_val, y_batch_val) in enumerate(val_gen):
    val_pred = model(x_batch_val, training=False)
    val_metric.update_state(y_batch_val, val_pred)

val_auc = val_metric.result()
val_metric.reset_states()
print(f"AUC over val_set {val_auc}")

In [None]:
#Plotting Trainnig/Validation Loss and Accuracy