In [1]:
import os
import ast
import numpy as np
import pandas as pd
import tensorflow as tf
from typing import Tuple
from galleries_mapping import *
from tensorflow.keras.models import Model
from tensorflow.keras.utils import Sequence
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer
from tensorflow.keras.applications import EfficientNetB4
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.preprocessing.image import load_img, img_to_array
from tensorflow.keras.mixed_precision import experimental as mixed_precision
from tensorflow.keras.layers import GlobalAveragePooling2D, Dense, Input, Lambda

policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

ImportError: cannot import name 'experimental' from 'tensorflow.keras.mixed_precision' (/Users/milosz.bertman/Projects/milfusion/venv/lib/python3.12/site-packages/keras/_tf_keras/keras/mixed_precision/__init__.py)

In [None]:
# todo: compile dataset with structure - 
# todo: apply sorting for the df based on available galleries
# done: apply gallery mapping

In [None]:
df = pd.read_csv('datasets/galleries_dataset.csv')
df.dropna(subset=['categories', 'categories_suggestions'], inplace=True)
df['categories'] = df['categories'].apply(ast.literal_eval)
df['categories_suggestions'] = df['categories_suggestions'].apply(ast.literal_eval)
df.shape

In [None]:
def merge_categories(row):
    categories = set((row['categories']))
    categories_suggestions = set(row['categories_suggestions'])
    categories_superset = {category.strip().lower() for category in categories.union(categories_suggestions)}
    return list(categories_superset)


df['labels'] = df['categories'].apply(lambda x: [category.strip().lower() for category in x])
df['labels'] = df.apply(merge_categories, axis=1)
df.drop(['categories_suggestions', 'categories'], axis=1, inplace=True)
df.shape

In [None]:
def gallery_mapping(row):
    labels = row.labels

    out = []
    for L in labels:
        _fetched = FIXED_GALLERIES_MAP.get(L, None)
        if _fetched is remove_tag:
            continue
        elif _fetched is remove_gallery:
            return None
        elif isinstance(_fetched, list):
            out.extend(_fetched)
        elif _fetched is keep_tag:
            out.append(L.lower())

    return list(set(out))


df['labels'] = df.apply(gallery_mapping, axis=1)
df = df[df['labels'].notnull()]
df.shape

In [None]:
df = df[['gallery_name', 'labels']]
df.head()

In [None]:
mlb = MultiLabelBinarizer()
one_hot_labels = mlb.fit_transform(df['labels'])
df['one_hot_labels'] = list(one_hot_labels)
num_labels = len(mlb.classes_)
num_labels

In [None]:
# CREATE GALLERY MAPPING

In [None]:
train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

In [None]:
all_labels = np.concatenate(df['one_hot_labels'].values)
class_weights = compute_class_weight('balanced', classes=np.arange(num_labels), y=all_labels)
class_weights_dict = {i: weight for i, weight in enumerate(class_weights)}

In [None]:
image_input_shape = (380, 380, 3)
max_images = 24
image_base_path = 'images'

In [None]:
def load_and_preprocess_image(image_path):
    image = tf.io.read_file(image_path)
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, image_input_shape[:2])
    image = image / 255.0
    return image


def process_gallery(category: tf.Tensor, gallery_name: tf.Tensor, label: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
    gallery_path = os.path.join(image_base_path, category.numpy().decode('utf-8'), gallery_name.numpy().decode('utf-8'))
    image_files = tf.io.gfile.glob(os.path.join(gallery_path, '*.jpg'))[:max_images]
    images = tf.data.Dataset.from_tensor_slices(image_files).map(
        load_and_preprocess_image, num_parallel_calls=tf.data.AUTOTUNE
    )
    images = images.batch(max_images)
    images = tf.pad(
        images,
        [
            [0, 0],
            [0, max_images - tf.shape(images)[0]],
            [0, 0],
            [0, 0],
            [0, 0]
        ]
    )
    images = tf.squeeze(images)
    return images, label


def tf_process_gallery(category: tf.Tensor, gallery_name: tf.Tensor, label: tf.Tensor) -> Tuple[tf.Tensor, tf.Tensor]:
    return tf.py_function(process_gallery, [category, gallery_name, label], [tf.float32, tf.int64])


def create_dataset(df: pd.DataFrame, batch_size: int) -> tf.data.Dataset:
    dataset = tf.data.Dataset.from_tensor_slices(
        (df['category'].values, df['gallery_name'].values, np.stack(df['one_hot_labels'].values)))
    dataset = dataset.map(tf_process_gallery, num_parallel_calls=tf.data.AUTOTUNE)
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
    return dataset


# Create datasets
batch_size = 32
train_dataset = create_dataset(train_df, batch_size)
test_dataset = create_dataset(test_df, batch_size)

In [None]:
def create_cnn_base() -> Model:
    base_model = EfficientNetB4(include_top=False, input_shape=image_input_shape, weights='imagenet')
    x = GlobalAveragePooling2D()(base_model.output)
    model = Model(inputs=base_model.input, outputs=x)
    return model


cnn_base = create_cnn_base()

In [None]:
def process_variable_input(images: tf.Tensor, max_images: int) -> tf.Tensor:
    num_images = tf.shape(images)[0]
    padded_images = tf.concat([images, tf.zeros([max_images - num_images, *image_input_shape])], axis=0)
    # for zero-images duplicate their value
    image_features = tf.map_fn(lambda img: cnn_base(img), padded_images, dtype=tf.float32)

    mask = tf.sequence_mask([num_images], maxlen=max_images, dtype=tf.float32)
    mask = tf.expand_dims(mask, -1)
    masked_features = image_features * mask

    aggregated_features = tf.reduce_sum(masked_features, axis=0) / tf.reduce_sum(mask, axis=0)
    return aggregated_features

In [None]:
input_images = Input(shape=(max_images, *image_input_shape))
aggregated_features = Lambda(lambda x: process_variable_input(x, max_images))(input_images)
x = Dense(512, activation='relu')(aggregated_features)
output = Dense(num_labels, activation='sigmoid')(x)  # num_labels is the number of classes
model = Model(inputs=input_images, outputs=output)

In [None]:
def weighted_binary_crossentropy(y_true: tf.Tensor, y_pred: tf.Tensor) -> tf.Tensor:
    weights = tf.constant(class_weights, dtype=tf.float32)
    epsilon = tf.keras.backend.epsilon()
    y_pred = tf.clip_by_value(y_pred, epsilon, 1 - epsilon)
    bce = y_true * tf.math.log(y_pred + epsilon) + (1 - y_true) * tf.math.log(1 - y_pred + epsilon)
    weighted_bce = -bce * weights
    return tf.reduce_mean(weighted_bce, axis=-1)

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.001)
model.compile(optimizer=opt, loss=weighted_binary_crossentropy, metrics=[
    tfa.metrics.HammingLoss(mode='multilabel', name='hamming_loss'),
    tfa.metrics.F1Score(num_classes=num_labels, average='micro', name='f1_micro'),
    tfa.metrics.F1Score(num_classes=num_labels, average='macro', name='f1_macro'),
    tfa.metrics.F1Score(num_classes=num_labels, average='weighted', name='f1_weighted')
])
model.summary()

In [None]:
model.fit(train_dataset, epochs=10, validation_data=test_dataset)