In [1]:
import os
import numpy as np
import pandas as pd
import json
from datetime import datetime

import tensorflow as tf
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import EfficientNetV2B1

import albumentations as A

import mlflow
import mlflow.tensorflow

from sklearn.utils.class_weight import compute_class_weight
from sklearn.model_selection import train_test_split
from sklearn import metrics

from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns

2023-04-17 12:53:32.599840: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [19]:
with open('./config.json', 'r') as f:
    config = json.loads(f.read())

image_dir = config['img_dir']
img_size = config['img_size']
batch_size = config['batch_size'] 

train_df = pd.read_csv(config['train_metadata_filepath'])
val_df = pd.read_csv(config['val_metadata_filepath'])
test_df = pd.read_csv('./data/splits/test_material_2023-04-13.csv')

train_df = pd.read_csv('./data/splits/test_material_2023-04-13.csv')

patience = config['patience']
epochs = config['epochs']

seed = config['seed']
labels = list(train_df.columns)[1:]

freeze_layers = config["freeze_layers"]
freeze_layers


0.1

In [28]:
img_size = 40
batch_size = 15
epochs = 5
learning_rate = 1e-5

labels = list(train_df.columns)[1:]

augmentation_pipeline = A.Compose([
    # Add your desired augmentations here
    A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2),
    A.HorizontalFlip(),
    A.VerticalFlip(),
    A.ToGray(p=1)
])

def augment_images(images):
    images = images.astype(np.float32) / 255.0
    augmented_image = augmentation_pipeline(image=images)['image']
    return augmented_image


datagen_train = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=augment_images
)



datagen_val = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,

)

train_generator = datagen_train.flow_from_dataframe(
    dataframe=train_df,
    directory=image_dir,
    x_col='file_name',
    y_col=labels,
    class_mode='raw',
    target_size=(img_size, img_size), 
    batch_size=batch_size,
    shuffle=True,
)

val_generator = datagen_val.flow_from_dataframe(
    dataframe=val_df,
    directory=image_dir,
    x_col='file_name',
    y_col=labels,
    class_mode='raw',
    target_size=(img_size, img_size), 
    batch_size=batch_size,
    shuffle=False,
)

test_generator = datagen_val.flow_from_dataframe(
    dataframe=test_df,
    directory=image_dir,
    x_col='file_name',
    y_col=labels,
    class_mode='raw',
    target_size=(img_size, img_size), 
    batch_size=batch_size,
    shuffle=False,
)


Found 245 validated image filenames.
Found 202 validated image filenames.
Found 245 validated image filenames.


In [46]:
def lr_function(epoch):
    start_lr = 1e-6; min_lr = 1e-6; max_lr = 1e-4
    rampup_epochs = 5; sustain_epochs = 0; exp_decay = .8
    
    def lr(epoch, start_lr, min_lr, max_lr, rampup_epochs, 
           sustain_epochs, exp_decay):
        if epoch < rampup_epochs:
            lr = ((max_lr - start_lr) / rampup_epochs 
                        * epoch + start_lr)
        elif epoch < rampup_epochs + sustain_epochs:
            lr = max_lr
        else:
            lr = ((max_lr - min_lr) * 
                      exp_decay**(epoch - rampup_epochs -
                                    sustain_epochs) + min_lr)
        return lr

    return lr(epoch, start_lr, min_lr, max_lr, 
              rampup_epochs, sustain_epochs, exp_decay)

In [21]:
base_model = EfficientNetV2B1(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))


if freeze_layers > 0:
    num_layers = len(base_model.layers)
    index = int(num_layers * freeze_layers)
    
    for i, layer in enumerate(base_model.layers):
        if i < index:
            layer.trainable = False
        else:
            layer.trainable = True
else:
    for layer in base_model.layers:
        layer.trainable = True

frozen_layers = 0
for layer in base_model.layers:
    if not layer.trainable:
        frozen_layers += 1

print("Number of frozen layers in model: ", frozen_layers)
total_layers = len(base_model.layers)
print("Total number of layers in model: ", total_layers)

Number of frozen layers in base_model:  33
Total number of layers in base_model:  334


In [48]:
base_model = EfficientNetV2B1(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))


for layer in base_model.layers:
    layer.trainable = True

x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
predictions = Dense(len(labels), activation='sigmoid')(x)


model = tf.keras.models.Model(inputs=base_model.input, outputs=predictions)

optimizer = Adam(learning_rate=learning_rate)


model.compile(optimizer=optimizer, 
                    loss='binary_crossentropy', 
                    metrics=['accuracy'],
                    )

early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=patience, monitor="val_loss", restore_best_weights=True)

mlflow.tensorflow.autolog()

history = model.fit(train_generator,
                    validation_data=val_generator,
                    epochs=epochs,
                    batch_size=batch_size,
                    callbacks=[early_stopping_cb],
                    )



mlflow.log_param('lr', learning_rate)
mlflow.log_param('batch_size', batch_size)
mlflow.log_param('epochs', epochs)
mlflow.log_param('img_size', img_size)
#mlflow.keras.log_model(model, 'model')


mlflow.end_run()


2023/04/13 18:05:33 INFO mlflow.utils.autologging_utils: Created MLflow autologging run with ID 'a33b76fab1dc4d8482bfff658ca568d8', which will track hyperparameters, performance metrics, model artifacts, and lineage information for the current tensorflow workflow


Epoch 1/5

KeyError: 'You have to pass data to augmentations as named arguments, for example: aug(image=image)'

In [22]:
history.history

{'loss': [0.7077394723892212],
 'accuracy': [0.11836734414100647],
 'val_loss': [0.78299480676651],
 'val_accuracy': [0.0445544570684433]}

In [23]:
plt.plot(range(1,range(1,(epochs+1))),history.history['loss'],label = 'loss')
plt.plot(range(1,range(1,(epochs+1))),history.history['val_loss'],label = 'val_loss')
plt.legend();
plt.show()
plt.close()
plt.plot(range(1,range(1,(epochs+1))),history.history['accuracy'],label = 'accuracy')
plt.plot(range(1,range(1,(epochs+1))),history.history['val_accuracy'],label = 'val_accuracy')
plt.legend();

TypeError: 'range' object cannot be interpreted as an integer

In [129]:
test_history = model.predict(
                        test_generator,
                        use_multiprocessing=False,
                        verbose=1)

Validation loss: 0.122
Validation accuracy: 0.006


In [15]:
old_model = mlflow.keras.load_model('./mlruns/0/2969d2146ed042fdad1f23e2b341f725/artifacts/model')

In [16]:
test_history = old_model.predict(
                        val_generator,
                        #steps=steps_test,
                        #callbacks=None,
                       #max_queue_size=10,
                        #workers=-1,
                        use_multiprocessing=False,
                       verbose=1)

