In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.utils import class_weight
from sklearn.metrics import accuracy_score
from tqdm.notebook import tqdm

In [None]:
data_dir = 'data/'

image_size = 299
batch_size = 32

In [None]:
def get_class_weight(ds):
    y = np.concatenate([y for x, y in ds], axis=0)
    class_weights = class_weight.compute_class_weight('balanced',
                                                 classes = np.unique(y),
                                                 y = y)
    return class_weights

In [None]:
train_ds = tf.keras.utils.image_dataset_from_directory(data_dir+"train", seed=123, 
                                                       image_size=(image_size, image_size), 
                                                       batch_size=batch_size)
class_names = train_ds.class_names
num_classes = len(class_names)

class_weights = get_class_weight(train_ds)
class_weights = dict(enumerate(class_weights))

Found 8012 files belonging to 7 classes.


In [None]:
val_ds = tf.keras.utils.image_dataset_from_directory(data_dir+'val',
                                                     image_size=(image_size, image_size), 
                                                     batch_size=32,  
                                                     shuffle=False, 
                                                     labels=None)

Found 1002 files belonging to 1 classes.


In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)

In [None]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip('horizontal'),
  tf.keras.layers.RandomRotation(0.2),
])

preprocess_input = tf.keras.applications.inception_v3.preprocess_input

In [None]:
base_model = tf.keras.applications.inception_v3.InceptionV3(input_shape=(image_size, image_size, 3), 
                                                            include_top=False, weights='imagenet')

In [None]:
train_ds = train_ds.map(lambda x,y : (preprocess_input(data_augmentation(x, training=True)), y))
val_ds = val_ds.map(lambda x: preprocess_input(x))

In [None]:
train_base = True
base_model.trainable = train_base

global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
classification_layer = tf.keras.layers.Dense(1024, activation='relu')
prediction_layer = tf.keras.layers.Dense(num_classes, activation='softmax')

x = base_model.layers[-1].output

x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = classification_layer(x)
outputs = prediction_layer(x)

model = tf.keras.Model(base_model.input, outputs)

In [None]:
base_learning_rate = 1e-5

loss_fn   = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False)
optimizer = tf.keras.optimizers.Adam(learning_rate=base_learning_rate)

model.compile(optimizer=optimizer, loss=loss_fn, metrics=["acc"])

In [None]:
epochs = 30

In [None]:
def evaluate(model, ds, folder):
    df = pd.read_csv(f'data/{folder}_truth.csv')[['image_id','dx']]
    df = df.sort_values(by = 'image_id')
    class_names = sorted(list(set(df['dx'])))
    class_names = {name : idx for idx, name in enumerate(class_names)}
    df['dx'] = df['dx'].apply(lambda x: class_names[x])
    y_actual = list(df['dx'])
        
    y_pred = tf.convert_to_tensor([])
    for batch_img in ds:
        logit = model(batch_img, training = False)
        pred = np.argmax(logit, axis = 1)
        y_pred = tf.concat([y_pred, pred], axis = 0)

    y_pred = y_pred.numpy()
    print("acc :", accuracy_score(y_actual, y_pred))

In [None]:
_ = model.fit(train_ds, epochs = epochs, class_weight = class_weights)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [None]:
evaluate(model, val_ds, 'val')

acc : 0.8303393213572854


In [None]:
test_ds = tf.keras.utils.image_dataset_from_directory(data_dir+'test',
                                                     image_size=(image_size, image_size), 
                                                     batch_size=32, 
                                                     shuffle=False, 
                                                     labels=None)
test_ds = test_ds.map(lambda x: preprocess_input(x))   
evaluate(model, test_ds, 'test')

Found 1001 files belonging to 1 classes.
acc : 0.8291708291708292


In [None]:
model.save('model/inception')



INFO:tensorflow:Assets written to: model/inception/assets


INFO:tensorflow:Assets written to: model/inception/assets
