In [1]:
BATCH_SIZE = 1024

PAD_SIZE = 80
TARGET_LABELS = [
    "Improvement",
    "Solved",
    #"AreaRatio",
    #"Stacks"
]

SEED = 3093453

# Preprocessing

## Load Packages and Data

In [2]:
import os
import sys

import numpy as np
import pandas as pd
import polars as pl

import tensorflow as tf

import matplotlib.pyplot as plt
import seaborn as sns

# Random Seeds
np.random.seed(SEED)
tf.random.set_seed(SEED)



# Cosmetics
np.set_printoptions(
    edgeitems=30,
    linewidth=100_000,
    suppress=True
    )

sns.set(
    context="talk",
    style="darkgrid"
)


cwd = os.getcwd()
cwd, _ = os.path.split(cwd)

In [3]:
import tensorflow as tf
from tensorflow import keras

train_samples = os.path.join(cwd, "data", "train") # 116586 + 489623 = 606209
val_samples = os.path.join(cwd, "data", "validation")
test_samples = os.path.join(cwd, "data", "test")

train_dataset = keras.preprocessing.text_dataset_from_directory(
    directory = train_samples,
    labels = None,
    batch_size = BATCH_SIZE,
    max_length = None,
    shuffle = True,
    seed = SEED,
)

val_dataset = keras.preprocessing.text_dataset_from_directory(
    directory = val_samples,
    labels = None,
    batch_size = BATCH_SIZE,
    max_length = None,
    shuffle = True,
    seed = SEED,
)

test_dataset = keras.preprocessing.text_dataset_from_directory(
    directory = test_samples,
    labels = None,
    batch_size = BATCH_SIZE,
    max_length = None,
    shuffle = True,
    seed = SEED,
)



Found 489623 files belonging to 1 classes.


2024-02-18 09:47:24.591914: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M1 Pro
2024-02-18 09:47:24.591950: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 16.00 GB
2024-02-18 09:47:24.591953: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 5.33 GB
2024-02-18 09:47:24.591991: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-02-18 09:47:24.592020: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Found 116586 files belonging to 1 classes.
Found 104591 files belonging to 1 classes.


In [4]:
for X in train_dataset:
    print(X[0])
    break

tf.Tensor(b"dataset: B\ninstance: MA2\n+-----------+\n2D Packing MIP with Time Limit 15 [s] in Truck P355037001\nStack 0 with items: ['0090016200_26102022000265', '0090016200_26102022000255']\nStack 1 with items: ['0090016200_26102022012372', '0090016200_26102022012372', '0090016200_26102022017816']\nStack 2 with items: ['0090016200_26102022006635', '0090016200_26102022002470', '0090016200_26102022013378']\nStack 3 with items: ['0090016200_26102022015271', '0090016200_26102022007589', '0090016200_26102022015271']\nStack 4 with items: ['0090016200_26102022001327', '0090016200_26102022001327']\nStack 5 with items: ['0090016200_26102022015678', '0090016200_26102022004493', '0090016200_26102022015678']\nStack 6 with items: ['0090016200_26102022019753', '0090016200_26102022019604', '0090016200_26102022019753']\nStack 7 with items: ['0090016200_26102022016002', '0090016200_26102022016002', '0090016200_26102022016002']\nStack 8 with items: ['0090016200_26102022012624', '0090016200_26102022012

In [5]:
def get_unpacked_area(X):
    y = tf.strings.split(X, "MIP Improvement")
    y = tf.map_fn(len, y, dtype=tf.int32)
    y = y > 2 # more than one MIP improvement. also split adds one item, therefore larger 2
    y = tf.cast(y, tf.float32)
    #X = tf.strings.split(X, "MIP Improvement - 2D Vol: ")[:,1:2]
    X = tf.strings.split(X, "- packed 2D Vol Ratio: ")[:,1:2]
    #X = tf.strings.split(X, "- after 0.0 [s] without stacks:")[:,0:1]
    X = tf.strings.split(X, " ")[:,:,:1]
    X = tf.squeeze(X, axis=(1,2))
    X = tf.strings.to_number(X, out_type=tf.float32)
    X = 1-X
    
    return X, y

#y = get_intial_area(X)
#y
XX, y = get_unpacked_area(X)
XX

<tf.Tensor: shape=(1024,), dtype=float32, numpy=array([0.1016925 , 0.05063021, 0.09620732, 0.13915849, 0.07468122, 0.08084244, 0.10040158, 0.1904068 , 0.10051888, 0.19854283, 0.05282331, 0.30105102, 0.13895583, 0.05172676, 0.32969034, 0.06921673, 0.0953989 , 0.07358468, 0.14644444, 0.18393576, 0.04237008, 0.27540982, 0.09653914, 0.10091072, 0.11468732, 0.1838488 , 0.2464481 , 0.0770328 , 0.08755022, 0.11839706, ..., 0.06739527, 0.05154616, 0.10746813, 0.14574862, 0.16861719, 0.06739527, 0.04553735, 0.06739527, 0.08925319, 0.1604681 , 0.05282331, 0.07103825, 0.1340437 , 0.3114754 , 0.05379111, 0.05282331, 0.05282331, 0.0627116 , 0.16854733, 0.16787148, 0.10510015, 0.10040158, 0.06739527, 0.16612023, 0.12908137, 0.05153793, 0.10432971, 0.10509837, 0.05282331, 0.09029752], dtype=float32)>

In [6]:
train_dataset = train_dataset.map(get_unpacked_area)#.take(1).get_single_element()
val_dataset = val_dataset.map(get_unpacked_area)
test_dataset = test_dataset.map(get_unpacked_area)

Instructions for updating:
Use fn_output_signature instead


# DNN

In [7]:
from keras import Model, layers

# Build a shallow model that just parrots the input of the extracted 2D Volume
## I am still not over the fact that you call the area the 2D Volume...

input_layer = layers.Input(shape=(1))
model = Model(
    inputs=input_layer,
    outputs=input_layer
)
model.summary()
#model.predict(mapped_train_dataset)

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 1)]               0         
                                                                 
Total params: 0 (0.00 Byte)
Trainable params: 0 (0.00 Byte)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [8]:
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    metrics= [
        tf.keras.metrics.SpecificityAtSensitivity(0.6565), #sns for training set
        tf.keras.metrics.Precision(name = "PRC", thresholds=.1),
        tf.keras.metrics.Recall(name = "SNS", thresholds=.1),
        tf.keras.metrics.AUC(curve='PR', name="AUC"),
        #tf.keras.metrics.F1Score(average="weighted", name="F1", ),
        tf.keras.metrics.BinaryAccuracy(name = "ACC", threshold=.1)
    ],
)

# Prediction

In [9]:
for dataset in [
    train_dataset,
    val_dataset,
    test_dataset
]:
    model.fit(dataset);
    

2024-02-18 09:47:34.142586: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.




In [10]:
def F1_score(sns, prc):
    return 2*(sns*prc) / (sns+prc)

for pair in [
    (0.6287, 0.6565),
    (0.5974, 0.6565),
    (0.6047, 0.6258)
]:
    print(round(F1_score(*pair), 4))

0.6423
0.6256
0.6151


In [11]:
# 2* 0.3221 * 0.3742 /(0.3221 +0.3742)
# = 0.3462008329742927

# Ignore Keras F1 score, as scores do not comply with the F1 formula
# no matter which of the average are choosen