In [None]:
import warnings
warnings.filterwarnings('ignore')
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'

import tensorflow as tf
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
%matplotlib inline
import openslide
from openslide import OpenSlide
import tensorflow as tf
from tensorflow.keras import Model
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense

from sklearn.model_selection import train_test_split

from tqdm import tqdm

In [None]:
train_df = pd.read_csv("/kaggle/input/mayo-clinic-strip-ai/train.csv")
test_df = pd.read_csv("/kaggle/input/mayo-clinic-strip-ai/test.csv")

# Preprocessing

In [None]:
train_df["file_path"] = train_df["image_id"].apply(lambda x: "../input/mayo-clinic-strip-ai/train/" + x + ".tif")
test_df["file_path"]  = test_df["image_id"].apply(lambda x: "../input/mayo-clinic-strip-ai/test/" + x + ".tif")
train_df["Y"] = train_df["label"].apply(lambda x : 1 if x=="CE" else 0) # Creating truth labels

In [None]:
%%time
def preprocess(image_path):
    slide = OpenSlide(image_path)
    region = (1000,1000)    
    size = (5000, 5000)
    image = slide.read_region(region, 0, size)
    image = tf.image.resize(image, (1024, 1024))
    image = np.array(image)    
    return image


train_x=[]
for i in tqdm(train_df['file_path']):
    x1=preprocess(i)
    train_x.append(x1)


train_x = np.array(train_x)/255.0
train_y = train_df["Y"]


In [None]:

model = Sequential()
input_shape = (512, 512, 4)



model.add(Conv2D(filters=32, kernel_size = (3,3), strides = 2, padding = 'same', activation = 'relu', input_shape = input_shape))
model.add(Conv2D(filters=64, kernel_size = (3,3), strides = 2, padding = 'same', activation = 'relu'))

model.add(layers.BatchNormalization())
model.add(Dropout(0.20))

model.add(Conv2D(filters=64, kernel_size = (3,3), strides = 2, padding = 'same', activation = 'relu'))
model.add(Conv2D(filters=64, kernel_size = (3,3), strides = 2, padding = 'same', activation = 'relu'))

model.add(layers.BatchNormalization())

model.add(Flatten())
model.add(Dense(256, activation = 'relu'))
model.add(Dropout(0.20))
model.add(Dense(1))



# Training

In [None]:
%%time
train_y = train_df["Y"]
train_x,test_x,train_y,test_y=train_test_split(train_x,train_y,test_size=0.2)

import math
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping, Callback, ReduceLROnPlateau 




model.compile(
    loss = tf.keras.losses.MeanSquaredError(),    
    metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse"), 
             tf.keras.metrics.BinaryAccuracy(name="accuracy")],
    optimizer = tf.keras.optimizers.Adam(3e-4))

history = model.fit(
    train_x,
    train_y,
    epochs = 50,
    batch_size=32,
    validation_data = (test_x,test_y),
    shuffle=True,
    verbose = 1,
)

In [None]:
print(f"Epochs: {len(history.history['accuracy'])}")
print(f"Accuracy: {history.history['accuracy'][-1]}")
print(f"Validation Accuracy: {history.history['val_accuracy'][-1]}")
print(f"Loss: {history.history['loss'][-1]}")
print(f"Validation Loss: {history.history['val_loss'][-1]}")

# Submission

In [None]:
%%time
test_s=[]
for i in test_df['file_path']:
    x1=preprocess(i)
    test_s.append(x1)
test_s=np.array(test_s)/255.0

sub_pred=model.predict(test_s)

In [None]:
submission = pd.DataFrame(test_df["patient_id"].copy())
submission["CE"] = sub_pred
submission["CE"] = submission["CE"].apply(lambda x : 0 if x<0 else x)
submission["CE"] = submission["CE"].apply(lambda x : 1 if x>1 else x)
submission["LAA"] = 1- submission["CE"]

submission = submission.groupby("patient_id").mean()
submission = submission[["CE", "LAA"]].round(6).reset_index()
submission

In [None]:
submission.to_csv("submission.csv", index = False)
!head submission.csv