In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf

from sklearn.utils import class_weight
from tqdm.notebook import tqdm

In [2]:
data_dir = 'data/'

image_size = 299
batch_size = 16

In [3]:
def get_class_weight(ds):
    y = np.concatenate([y for x, y in ds], axis=0)
    class_weights = class_weight.compute_class_weight('balanced',
                                                 classes = np.unique(y),
                                                 y = y)
    return class_weights

In [4]:
train_ds = tf.keras.utils.image_dataset_from_directory(data_dir+"train", seed=123, 
                                                       image_size=(image_size, image_size), 
                                                       batch_size=batch_size)
class_names = train_ds.class_names
num_classes = len(class_names)

class_weights = get_class_weight(train_ds)
class_weights = dict(enumerate(class_weights))

Found 8012 files belonging to 7 classes.


2023-01-25 20:54:23.823131: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


In [5]:
val_ds = tf.keras.utils.image_dataset_from_directory(data_dir+'val',
                                                     image_size=(image_size, image_size), 
                                                     batch_size=1, 
                                                     shuffle=False, 
                                                     labels=None)

Found 1002 files belonging to 1 classes.


In [6]:
val_df = pd.read_csv('data/val_truth.csv')[['image_id','dx']]
val_df = val_df.sort_values(by = 'image_id')

In [7]:
AUTOTUNE = tf.data.AUTOTUNE

train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)

In [8]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip('horizontal'),
  tf.keras.layers.RandomRotation(0.2),
])

preprocess_input = tf.keras.applications.inception_v3.preprocess_input

In [9]:
base_model = tf.keras.applications.inception_v3.InceptionV3(input_shape=(image_size, image_size, 3), 
                                                            include_top=False, weights='imagenet')

In [10]:
train_ds = train_ds.map(lambda x,y : (preprocess_input(data_augmentation(x, training=True)), y))
val_ds = val_ds.map(lambda x: preprocess_input(x))

In [11]:
train_base = True
base_model.trainable = train_base

global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
classification_layer = tf.keras.layers.Dense(1024, activation='relu')
prediction_layer = tf.keras.layers.Dense(num_classes)

x = base_model.layers[-1].output

x = global_average_layer(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = classification_layer(x)
outputs = prediction_layer(x)

model = tf.keras.Model(base_model.input, outputs)

In [12]:
base_learning_rate = 1e-5

loss_fn   = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
optimizer = tf.keras.optimizers.Adam(learning_rate=base_learning_rate)

In [13]:
epochs = 1

In [19]:
val_ds = tf.keras.utils.image_dataset_from_directory(data_dir+'val',
                                                     image_size=(image_size, image_size), 
                                                     batch_size=1, 
                                                     shuffle=False, 
                                                     labels=None)
val_ds = val_ds.map(lambda x: preprocess_input(x))    

Found 1002 files belonging to 1 classes.


In [20]:
def evaluate(model, ds, folder):
    df = pd.read_csv(f'data/{folder}_truth.csv')[['image_id','dx']]
    df = val_df.sort_values(by = 'image_id')
    
    y_pred, y_target = [], []
    class_names = sorted(list(set(df['dx'])))
    class_names = {name : idx for idx, name in enumerate(class_names)}
    df['dx'] = df['dx'].apply(lambda x: class_names[x])
    acc = 0
    for idx, img in enumerate(ds):
        logit = model(img, training = False)
        pred = np.argmax(logit)
        label = df['dx'].iloc[idx]
        if pred == label :
            acc += 1
    print(folder, 'acc :', acc/len(df))

In [21]:
for epoch in range(epochs):
    total_step, loss = 0, 0 
    for step, (x_batch_train, y_batch_train) in enumerate(train_ds):
        with tf.GradientTape() as tape:
            logits = model(x_batch_train, training=True)  
            loss_value = loss_fn(y_batch_train, logits)
            
        grads = tape.gradient(loss_value, model.trainable_weights)
        optimizer.apply_gradients(zip(grads, model.trainable_weights))
        
        loss += loss_value.numpy()
        total_step += 1
    print(f'{epoch+1}. Loss : {loss/total_step}', end=" ")
    evaluate(model, val_ds, 'val')

  print(f'{epoch+1}. Loss : {loss/total_step}', end=" ")


1. Loss : inf val 0.14171656686626746


In [None]:
model.save('model/inception')