In [2]:
# for git
# MIL model
# used on Argonne server
# MIL+attentioin+LN use the new training dataset


import os
import numpy as np
import tensorflow as tf
from tensorflow import keras
import math
import pathlib
from os import makedirs
from os.path import exists, join
import glob
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.model_selection import train_test_split
import pandas as pd
from random import randrange
img_height = 200
img_width = 200
channel_num = 3
N = 40
batch_size = 16
BATCH_SIZE = batch_size
IMG_SIZE = (img_height, img_width)

In [3]:


#input_1, tracks
track_input = keras.Input(shape=(N, img_height, img_width, channel_num), name="input_1")  # (?, 40, 200, 200, 3)
#input_2, the 1/0 list, identify the real frames with 0, padding frames with 0
list_input = keras.Input(shape=(N, 1, 1, 1), name="input_2")  # (?, 40, 1, 1, 1)

x = tf.keras.layers.experimental.preprocessing.Rescaling(1./255)(track_input)# (?, 40, 200, 200, 3)

x = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(16, 3, padding='same'))(x)
x = tf.keras.layers.TimeDistributed(tf.keras.layers.LayerNormalization())(x)
x = tf.keras.activations.relu(x)
x = tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPooling2D())(x)

x = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(32, 3, padding='same'))(x)
x = tf.keras.layers.TimeDistributed(tf.keras.layers.LayerNormalization())(x)
x = tf.keras.activations.relu(x)
x = tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPooling2D())(x)

x = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(32, 3, padding='same'))(x)
x = tf.keras.layers.TimeDistributed(tf.keras.layers.LayerNormalization())(x)
x = tf.keras.activations.relu(x)
x = tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPooling2D())(x) # (?, 40, ?, ?, 32)

x = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(32, 3, padding='same'))(x)
x = tf.keras.layers.TimeDistributed(tf.keras.layers.LayerNormalization())(x)
x = tf.keras.activations.relu(x)
x = tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPooling2D())(x) # (?, 40, ?, ?, 32)

x = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(32, 3, padding='same'))(x)
x = tf.keras.layers.TimeDistributed(tf.keras.layers.LayerNormalization())(x)
x = tf.keras.activations.relu(x)
x = tf.keras.layers.TimeDistributed(tf.keras.layers.MaxPooling2D())(x) # (?, 40, ?, ?, 32)

# attention layer
attention_score = tf.keras.layers.TimeDistributed(tf.keras.layers.GlobalAveragePooling2D())(x) # (?, 40, 32)
attention_score = tf.keras.layers.Reshape((40, 1, 1, 32))(attention_score) # ?, 40, 1, 1, 32
attention_score = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(4, 1, padding='same', activation='relu'))(attention_score) # ?, 40, 1, 1, 4
attention_score = tf.keras.layers.TimeDistributed(tf.keras.layers.Conv2D(32, 1, padding='same'))(attention_score) # ?, 40, 1, 1, 32
attention_score = tf.keras.activations.sigmoid(attention_score) # ?, 40, 1, 1, 32

x = x * attention_score  # (?, 40, ?, ?, 32)

x = x * list_input # (batch_size, 40, ?, ?, 32)
x = tf.reduce_sum(x, 1) # (batch_size, ?, ?, 32)
x = x / tf.reduce_sum(list_input, 1) # (batch_size, ?, ?, 32)
x = tf.keras.layers.Flatten()(x)
Dense_LN_1 = tf.keras.layers.LayerNormalization()
x = Dense_LN_1(x)
x = tf.keras.layers.Dropout(0.2)(x)
x = tf.keras.layers.Dense(16)(x)
Dense_LN_2 = tf.keras.layers.LayerNormalization()
x = Dense_LN_2(x)
x = tf.keras.activations.relu(x)
class_pred = tf.keras.layers.Dense(1, name="bnb")(x)
model = keras.Model(
    inputs=[track_input, list_input],
    outputs=[class_pred],
)

# from-scratch accuracy metric
def tf_count(t, val):
    elements_equal_to_value = keras.backend.equal(t, val)
    as_ints = keras.backend.cast(elements_equal_to_value, tf.int32)
    count = keras.backend.sum(as_ints,0)
    return count

def accuracy(y_true, y_pred):
    y_pred = tf.nn.sigmoid(y_pred)
    y_pred = tf.where(y_pred < 0.5, 0.0, 1.0)
    correct_count = tf_count(y_true, y_pred)
    total_count = keras.backend.shape(y_true)[0]
    return correct_count/total_count

model.compile(
    optimizer=keras.optimizers.RMSprop(1e-3),
    loss={
        "bnb": keras.losses.BinaryCrossentropy(from_logits=True),
    },
    loss_weights=[1.0],
    metrics={"bnb": [accuracy]}
)

In [4]:
data_dir = "/project2/msca/projects/AvianSolar/ImageDataset/raw_dataset"


all_images = pd.read_csv(data_dir + "/all_image_merged.csv")
all_images = all_images[all_images['image_count'] >= 5]
all_images = all_images.sort_values(by=['obj_id', 'frame'])


all_images['full_file'] = data_dir + os.path.sep + all_images['day_dir'] + os.path.sep + all_images['camera_dir'] + os.path.sep + all_images['video_dir'] + os.path.sep + all_images['track_dir'].astype(str) + os.path.sep + all_images['image_file']

image_count = len(all_images)
# png for the avian dataset
real_image_count = len(pd.unique(all_images.obj_id))
train_ratio = 0.80
validation_ratio = 0.10
test_ratio = 0.10

train_rows = all_images[all_images['ttv_split'] == 1]
test_rows = all_images[all_images['ttv_split'] == 2]
val_rows = all_images[all_images['ttv_split'] == 3]


flag_val_ds = 0
flag_train_ds = 0
flag_test_ds = 0

# num_total_test_tracks = tf.data.experimental.cardinality(test_ds).numpy()

def get_label(file_path):
    # convert the path to a list of path components
    parts = tf.strings.split(file_path, os.path.sep)
    # The third to last is the class-directory in our avian solar dataset case
    label = tf.strings.to_number(parts[-1], out_type=tf.int64, name=None)
    # tf.strings.to_number(parts[-1], out_type=tf.int64, name=None)
    # Integer encode the label
    return label

def decode_img(img):
    # convert the compressed string to a 3D uint8 tensor
    img = tf.image.decode_jpeg(img, channels=3)
    # resize the image to the desired size
    return tf.image.resize(img, [img_height, img_width])

# train data generator
def generator_train():
    for id in pd.unique(train_rows.obj_id):
        current_track_rows = train_rows[train_rows.obj_id == id]
        label = pd.unique(current_track_rows.obj_cat_binary)
        track_img_list = current_track_rows['full_file'].values.tolist()
        # total number of frames in the track
        img_num = len(track_img_list)
        # record the real frame as 1, zero-padding fake frame as 0
        mark_tensor = np.zeros([N,])
        # the counted N frames in track as input_1
        img_list = []
        if img_num < N:
            for i in range(img_num):
                mark_tensor[i] = 1
                img = tf.io.read_file(track_img_list[i])
                # print(track_img_list[i])
                img = decode_img(img)
                img_list.append(img)
            imgs = tf.stack(img_list)
            zero_pads = tf.zeros([N - img_num, img_height, img_width, channel_num], tf.float32)
            imgs = tf.concat([imgs, zero_pads], 0)
        else:
            # use the middle N frames
            begin_num = math.floor((img_num - N) / 2)
            for i in range(N):
                mark_tensor[i] = 1
                img = tf.io.read_file(track_img_list[begin_num + i])
                # print(track_img_list[begin_num + i])
                img = decode_img(img)
                img_list.append(img)
            imgs = tf.stack(img_list)
        mark_tensor = tf.convert_to_tensor(mark_tensor, dtype=tf.float32)
        mark_tensor = tf.reshape(mark_tensor, [N, 1, 1, 1])
        yield {"input_1": imgs, "input_2": mark_tensor}, label


# val data generator
def generator_val():
    for id in pd.unique(val_rows.obj_id):
        current_track_rows = val_rows[val_rows.obj_id == id]
        label = pd.unique(current_track_rows.obj_cat_binary)
        track_img_list = current_track_rows['full_file'].values.tolist()
        # total number of frames in the track
        img_num = len(track_img_list)
        # record the real frame as 1, zero fake as 0
        mark_tensor = np.zeros([N,])
        # the counted N frames in track as input_1
        img_list = []
        if img_num < N:
            for i in range(img_num):
                mark_tensor[i] = 1
                img = tf.io.read_file(track_img_list[i])
                # print(track_img_list[i])
                img = decode_img(img)
                img_list.append(img)
            imgs = tf.stack(img_list)
            zero_pads = tf.zeros([N - img_num, img_height, img_width, channel_num], tf.float32)
            imgs = tf.concat([imgs, zero_pads], 0)
        else:
            # use the middle N frames
            begin_num = math.floor((img_num - N) / 2)
            for i in range(N):
                mark_tensor[i] = 1
                img = tf.io.read_file(track_img_list[begin_num + i])
                # print(track_img_list[begin_num + i])
                img = decode_img(img)
                img_list.append(img)
            imgs = tf.stack(img_list)
        mark_tensor = tf.convert_to_tensor(mark_tensor, dtype=tf.float32)
        mark_tensor = tf.reshape(mark_tensor, [N, 1, 1, 1])
        yield {"input_1": imgs, "input_2": mark_tensor}, label

# test data generator
def generator_test():
    for id in pd.unique(test_rows.obj_id):
        current_track_rows = test_rows[test_rows.obj_id == id]
        label = pd.unique(current_track_rows.obj_cat_binary)
        track_img_list = current_track_rows['full_file'].values.tolist()
        # total number of frames in the track
        img_num = len(track_img_list)
        # record the real frame as 1, zero fake as 0
        mark_tensor = np.zeros([N,])
        # the counted N frames in track as input_1
        img_list = []
        if img_num < N:
            for i in range(img_num):
                mark_tensor[i] = 1
                img = tf.io.read_file(track_img_list[i])
                # print(track_img_list[i])
                img = decode_img(img)
                img_list.append(img)
            imgs = tf.stack(img_list)
            zero_pads = tf.zeros([N - img_num, img_height, img_width, channel_num], tf.float32)
            imgs = tf.concat([imgs, zero_pads], 0)
        else:
            # use the middle N frames
            begin_num = math.floor((img_num - N) / 2)
            for i in range(N):
                mark_tensor[i] = 1
                img = tf.io.read_file(track_img_list[begin_num + i])
                # print(track_img_list[begin_num + i])
                img = decode_img(img)
                img_list.append(img)
            imgs = tf.stack(img_list)
        mark_tensor = tf.convert_to_tensor(mark_tensor, dtype=tf.float32)
        mark_tensor = tf.reshape(mark_tensor, [N, 1, 1, 1])
        yield {"input_1": imgs, "input_2": mark_tensor}, label

train_ds_temp = tf.data.Dataset.from_generator(generator_train, output_types=({"input_1": tf.float32, "input_2": tf.float32}, tf.int64))
val_ds_temp = tf.data.Dataset.from_generator(generator_val, output_types=({"input_1": tf.float32, "input_2": tf.float32}, tf.int64))
test_ds_temp = tf.data.Dataset.from_generator(generator_test, output_types=({"input_1": tf.float32, "input_2": tf.float32}, tf.int64))

def configure_for_performance(ds):
    ds = ds.batch(batch_size)
    return ds

train_ds_temp = configure_for_performance(train_ds_temp)
val_ds_temp = configure_for_performance(val_ds_temp)
test_ds_temp = configure_for_performance(test_ds_temp)


In [5]:
for x in train_ds_temp:
    break

In [6]:
np.sum(x[0]['input_2'])

252.0

In [7]:
x[1]

<tf.Tensor: shape=(16, 1), dtype=int64, numpy=
array([[0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0],
       [0]])>

In [8]:
# train the MIL model
model.fit(
    train_ds_temp,
    epochs=15,
    batch_size=BATCH_SIZE,
    steps_per_epoch = 2155,
    validation_data=val_ds_temp
)

Epoch 1/15
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
  10/2155 [..............................] - ETA: 2:49:43 - loss: 0.6056 - accuracy: 0.6510

KeyboardInterrupt: 

In [9]:
from numba import cuda 
device = cuda. get_current_device()
device.reset()