In [2]:
import os
import numpy as np
import pandas as pd
import json
from datetime import datetime

import tensorflow as tf
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import EfficientNetV2B1

import mlflow
import mlflow.tensorflow

from sklearn.model_selection import train_test_split
from sklearn import metrics

from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns

2023-04-12 21:43:29.920626: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [5]:
with open('./config.json', 'r') as f:
    config = json.loads(f.read())

image_dir = config['img_dir']
img_size = config['img_size']

train_df = pd.read_csv(config['train_metadata_filepath'])
val_df = pd.read_csv(config['val_metadata_filepath'])
test_df = pd.read_csv('./data/splits/test_recycle_2023-04-11.csv')

train_df = pd.read_csv('./data/splits/test_recycle_2023-04-11.csv')

patience = config['patience']
epochs = config['epochs']

seed = config['seed']


In [6]:
config


{'seed': 42,
 'img_dir': './data/all_images',
 'img_size': 675,
 'batch_size': 12,
 'epochs': 100,
 'learning_rate': 1e-05,
 'patience': 12,
 'train_metadata_filepath': './data/splits/train_recycle_partially_balanced_2023-04-11.csv',
 'val_metadata_filepath': './data/splits/val_recycle_2023-04-11.csv'}

In [11]:
type(datetime.now().strftime("%Y%m%d-%H%M%S"))
custom_run_id = datetime.now().strftime("%Y%m%d-%H%M%S") + "rec_imgSize_" + str(img_size)
custom_run_id

'20230412-220737rec_imgSize_40'

In [8]:
img_size = 40
batch_size = 15
epochs = 1
learning_rate = 1e-5

labels = list(train_df.columns)[1:]


# Define data generators for train and validation sets
datagen_train = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
    #rotation_range=15,
    #width_shift_range=0.2,
    #height_shift_range=0.2,
    #shear_range=0.005,
    #zoom_range=[0.9, 1.4],
    #horizontal_flip=True,
    #vertical_flip=False,
    #brightness_range=(.8,1.2),
    fill_mode='nearest',
)

datagen_val = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1./255,
)

train_generator = datagen_train.flow_from_dataframe(
    dataframe=train_df,
    directory='/Users/mjs/Desktop/Dev/TACO/data/all_images',
    x_col='file_name',
    y_col=labels,
    class_mode='raw',
    target_size=(img_size, img_size), 
    batch_size=batch_size,
    shuffle=True,
)

val_generator = datagen_val.flow_from_dataframe(
    dataframe=val_df,
    directory='/Users/mjs/Desktop/Dev/TACO/data/all_images',
    x_col='file_name',
    y_col=labels,
    class_mode='raw',
    target_size=(img_size, img_size), 
    batch_size=batch_size,
    shuffle=False,
)

test_generator = datagen_val.flow_from_dataframe(
    dataframe=test_df,
    directory='/Users/mjs/Desktop/Dev/TACO/data/all_images',
    x_col='file_name',
    y_col=labels,
    class_mode='raw',
    target_size=(img_size, img_size), 
    batch_size=batch_size,
    shuffle=False,
)


Found 237 validated image filenames.
Found 199 validated image filenames.
Found 237 validated image filenames.


In [169]:
def lr_function(epoch):
    start_lr = 1e-6; min_lr = 1e-6; max_lr = 1e-4
    rampup_epochs = 5; sustain_epochs = 0; exp_decay = .8
    
    def lr(epoch, start_lr, min_lr, max_lr, rampup_epochs, 
           sustain_epochs, exp_decay):
        if epoch < rampup_epochs:
            lr = ((max_lr - start_lr) / rampup_epochs 
                        * epoch + start_lr)
        elif epoch < rampup_epochs + sustain_epochs:
            lr = max_lr
        else:
            lr = ((max_lr - min_lr) * 
                      exp_decay**(epoch - rampup_epochs -
                                    sustain_epochs) + min_lr)
        return lr

    return lr(epoch, start_lr, min_lr, max_lr, 
              rampup_epochs, sustain_epochs, exp_decay)

In [None]:
base_model = EfficientNetV2B1(weights='imagenet', include_top=False, input_shape=(img_size, img_size, 3))


for layer in base_model.layers:
    layer.trainable = True

x = base_model.output
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dense(1024, activation='relu')(x)
predictions = Dense(len(labels), activation='sigmoid')(x)


model = tf.keras.models.Model(inputs=base_model.input, outputs=predictions)

optimizer = Adam(learning_rate=learning_rate)
#optimizer = Adam(learning_rate=lr_function(epochs))


model.compile(optimizer=optimizer, 
                    loss='binary_crossentropy', 
                    metrics=['accuracy'],
                    )

early_stopping_cb = tf.keras.callbacks.EarlyStopping(patience=5, monitor="val_loss", restore_best_weights=True)

mlflow.tensorflow.autolog()

history = model.fit(train_generator,
                    validation_data=val_generator,
                    epochs=epochs,
                    batch_size=batch_size,
                    callbacks=[early_stopping_cb],
                    )

mlflow.log_params({
    "img_size": img_size,
    "learning_rate": learning_rate,
    "labels": labels,
    "epochs": epochs,
    "batch_size": batch_size
})

In [None]:
plt.plot(range(1,range(1,(epochs+1))),history.history['loss'],label = 'loss')
plt.plot(range(1,range(1,(epochs+1))),history.history['val_loss'],label = 'val_loss')
plt.legend();
plt.show()
plt.close()
plt.plot(range(1,range(1,(epochs+1))),history.history['accuracy'],label = 'accuracy')
plt.plot(range(1,range(1,(epochs+1))),history.history['val_accuracy'],label = 'val_accuracy')
plt.legend();

In [129]:
test_history = model.predict(
                        test_generator,
                        use_multiprocessing=False,
                        verbose=1)

Validation loss: 0.122
Validation accuracy: 0.006


In [15]:
old_model = mlflow.keras.load_model('./mlruns/0/2969d2146ed042fdad1f23e2b341f725/artifacts/model')

In [16]:
test_history = old_model.predict(
                        val_generator,
                        #steps=steps_test,
                        #callbacks=None,
                       #max_queue_size=10,
                        #workers=-1,
                        use_multiprocessing=False,
                       verbose=1)

