In [None]:
import os
import pandas as pd

In [None]:
images = os.listdir('../input/datathonindoml-2022/train/train/')

In [None]:
import tifffile

In [None]:
img = tifffile.imread('../input/datathonindoml-2022/train/train/'+images[9])
import matplotlib.pyplot as plt
plt.imshow(img[100:900, 100:700]/255) 
img.shape

In [None]:
labels = pd.read_csv('../input/datathonindoml-2022/train_labels.csv')

In [None]:
labels.shape

In [None]:
from tensorflow.keras.layers import Conv2D, Dense, LayerNormalization, Activation, MaxPool2D, GlobalAveragePooling2D, Add, Input, Flatten
from tensorflow.keras import Model
from tensorflow.keras.regularizers import l2
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
tf.config.run_functions_eagerly(True)

n = 9 # 56 layers
channels = [ 16*1.5, 32*1.5, 64*1.5 ]

inputs = Input(shape=(800, 600, 3))
x = Conv2D(channels[0], kernel_size=(4, 4), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4))(inputs)
x = LayerNormalization()(x)
x = Activation(tf.nn.gelu)(x)

for c in channels:
    for i in range(n):
        subsampling = i == 0 and c > 16
        strides = (2, 2) if subsampling else (1, 1)
        y = Conv2D(c, kernel_size=(7, 7), padding="same", strides=strides, kernel_initializer="he_normal", kernel_regularizer=l2(1e-4))(x)
        y = Activation(tf.nn.gelu)(y)
        y = Conv2D(c, kernel_size=(2, 2), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4))(y)
        y = LayerNormalization()(y)        
        if subsampling:
            x = Conv2D(c, kernel_size=(1, 1), strides=(2, 2), padding="same", kernel_initializer="he_normal", kernel_regularizer=l2(1e-4))(x)
        x = Add()([x, y])
        x = Activation(tf.nn.gelu)(x)

x = GlobalAveragePooling2D()(x)
x = Flatten()(x)
x = Dense(3000, activation = tf.nn.relu)(x)
x = Dense(1500, activation = tf.nn.relu)(x)
x = Dense(1000, activation = tf.nn.relu)(x)
x  = Dense(100, activation = tf.nn.relu)(x)
outputs = Dense(16, activation=tf.nn.softmax, kernel_initializer="he_normal")(x)

model = Model(inputs=inputs, outputs=outputs)
model.type = "resnet" + str(6 * n + 2)

In [None]:
model.summary()

from tensorflow.keras.utils import plot_model
plot_model(model, to_file='model.png')

In [None]:
import keras.backend as K

In [None]:
def get_f1(y_true, y_pred): 
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0.0, 1.0)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0.0, 1.0)))
    precision = true_positives / (predicted_positives + K.epsilon())
    recall = true_positives / (possible_positives + K.epsilon())
    f1_val = 2*(precision*recall)/(precision+recall+K.epsilon())
    return f1_val

In [None]:
from sklearn.metrics import f1_score, confusion_matrix
model.compile(loss = 'categorical_crossentropy', metrics = ['accuracy'], optimizer = Adam(learning_rate = 0.0003))

#accuracy is positively correlated with f1-score. Hence, accuracy metric is a viable option

In [None]:
for i in range(16):
    path = './' + str(i)
    os.mkdir(path)

In [None]:
#Dividing the images as per their labels

from PIL import Image
import numpy as np
for i, j in zip(labels['id'],labels['label']):
    path = './'+str(j)+'/'
    img = tifffile.imread('../input/datathonindoml-2022/train/train/'+str(i)+'.tif')
    img = img[100:900, 100:700]/255
    image = Image.fromarray(img)
    image.convert('RGB').save(path+str(i)+'.png')

In [None]:
os.rmdir('./.virtual_documents')
tf.data.experimental.enable_debug_mode()
from tensorflow.keras.preprocessing import image_dataset_from_directory

ds_train = image_dataset_from_directory(
                './',
                labels = 'inferred',
                label_mode='categorical',
                color_mode = 'rgb',
                batch_size = 3,
                image_size = (800, 600),
                shuffle = True,
                seed = 123,
                validation_split = 0.1,
                subset = 'training',
                
)

ds_validation = image_dataset_from_directory(
                './',
                labels = 'inferred',
                label_mode='categorical',
                color_mode = 'rgb',
                batch_size = 3,
                image_size = (800, 600),
                shuffle = True,
                seed = 123,
                validation_split = 0.1,
                subset = 'validation',
                
)

from tensorflow.keras.callbacks import EarlyStopping
import gc
gc.collect()
model.fit(ds_train,
          epochs = 15,
          batch_size = 3,
          validation_data = ds_validation,
          callbacks = [EarlyStopping(monitor='loss', patience = 2)])

In [None]:
os.mkdir('./saved_model')
my_model = model.save('./saved_model/document_classifier.h5')
import pickle

In [None]:
from tensorflow import keras
model = keras.models.load_model('document_classifier')