TABULAR AUTO DL

In [None]:
import numpy as np
import pandas as pd
import keras_tuner as kt
import tensorflow as tf

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

from tensorflow import keras
from tensorflow.keras import layers, regularizers


class AutoDLTabular:

    def __init__(self, csv_path):
        self.csv_path = csv_path


    ################################################
    # LOAD DATA
    ################################################

    def load(self):

        df = pd.read_csv(self.csv_path)

        X = df.iloc[:, :-1]
        y = df.iloc[:, -1]

        # detect task
        if "float" in str(y.dtype):
            self.task = "regression"
        else:
            self.task = "classification"

        if self.task == "classification":
            self.encoder = LabelEncoder()
            y = self.encoder.fit_transform(y)

        self.scaler = StandardScaler()
        X = self.scaler.fit_transform(X)

        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.2
        )

        self.input_dim = X.shape[1]
        self.num_classes = len(np.unique(y))

        return X_train, X_test, y_train, y_test


    ################################################
    # MODEL BUILDER
    ################################################

    def build_model(self, hp):

        inputs = keras.Input(shape=(self.input_dim,))
        x = inputs

        regularizer = regularizers.l2(
            hp.Float("l2", 1e-6, 1e-2, sampling="log")
        )

        for i in range(hp.Int("layers", 1, 8)):

            units = hp.Int(f"units_{i}", 16, 1024, step=16)

            dense = layers.Dense(
                units,
                activation=hp.Choice(
                    "activation",
                    ["relu", "gelu", "selu", "swish"]
                ),
                kernel_initializer=hp.Choice(
                    "initializer",
                    ["he_normal", "glorot_uniform"]
                ),
                kernel_regularizer=regularizer
            )(x)

            if hp.Boolean("batch_norm"):
                dense = layers.BatchNormalization()(dense)

            dense = layers.Dropout(
                hp.Float("dropout", 0.0, 0.5, step=0.05)
            )(dense)

            # optional residual
            if hp.Boolean("residual") and dense.shape[-1] == x.shape[-1]:
                x = layers.Add()([x, dense])
            else:
                x = dense

        ###################################
        # OUTPUT
        ###################################

        if self.task == "regression":

            outputs = layers.Dense(1)(x)
            loss = "mse"
            metrics = ["mae"]

        else:

            if self.num_classes == 2:
                outputs = layers.Dense(1, activation="sigmoid")(x)
                loss = keras.losses.BinaryCrossentropy(
                    label_smoothing=hp.Float(
                        "label_smooth", 0, 0.1
                    )
                )
            else:
                outputs = layers.Dense(
                    self.num_classes,
                    activation="softmax"
                )(x)

                loss = keras.losses.SparseCategoricalCrossentropy(
                    label_smoothing=hp.Float(
                        "label_smooth", 0, 0.1
                    )
                )

            metrics = ["accuracy"]

        ###################################
        # OPTIMIZER
        ###################################

        lr = hp.Float("lr", 1e-5, 1e-2, sampling="log")

        optimizer = hp.Choice("optimizer", [
            keras.optimizers.Adam(
                lr,
                clipnorm=hp.Float("clipnorm", 0.5, 5)
            ),
            keras.optimizers.AdamW(lr),
            keras.optimizers.RMSprop(lr),
        ])

        model = keras.Model(inputs, outputs)

        model.compile(
            optimizer=optimizer,
            loss=loss,
            metrics=metrics
        )

        return model


    ################################################
    # TRAIN
    ################################################

    def train(self):

        X_train, X_test, y_train, y_test = self.load()

        tuner = kt.Hyperband(
            self.build_model,
            objective="val_loss",
            max_epochs=50,
            factor=3,
            overwrite=True
        )

        early = keras.callbacks.EarlyStopping(
            patience=10,
            restore_best_weights=True
        )

        reduce_lr = keras.callbacks.ReduceLROnPlateau(
            patience=5,
            factor=0.3
        )

        tuner.search(
            X_train,
            y_train,
            validation_split=0.2,
            epochs=100,
            callbacks=[early, reduce_lr],
            batch_size=kt.HyperParameters().Choice(
                "batch_size", [16,32,64,128,256]
            )
        )

        self.best_hp = tuner.get_best_hyperparameters(1)[0]

        self.model = tuner.hypermodel.build(self.best_hp)

        self.model.fit(
            X_train,
            y_train,
            validation_split=0.2,
            epochs=100,
            callbacks=[early, reduce_lr],
            batch_size=self.best_hp.get("batch_size")
        )

        self.test_data = (X_test, y_test)


    def evaluate(self):
        return self.model.evaluate(*self.test_data)


    def save(self, name="tabular_model"):
        self.model.save(name)

IMAGE AUTO DL

In [None]:
import tensorflow as tf
import keras_tuner as kt
from tensorflow import keras
from tensorflow.keras import layers


class AutoDLImage:

    def __init__(self, data_dir):
        self.data_dir = data_dir


    #########################################
    # LOAD
    #########################################

    def load(self):

        img_size = (hp_size := 128)

        train = tf.keras.utils.image_dataset_from_directory(
            self.data_dir,
            validation_split=0.2,
            subset="training",
            seed=123,
            image_size=(hp_size, hp_size),
            batch_size=32
        )

        val = tf.keras.utils.image_dataset_from_directory(
            self.data_dir,
            validation_split=0.2,
            subset="validation",
            seed=123,
            image_size=(hp_size, hp_size),
            batch_size=32
        )

        self.num_classes = len(train.class_names)

        return train.prefetch(tf.data.AUTOTUNE), val.prefetch(tf.data.AUTOTUNE)


    #########################################
    # MODEL
    #########################################

    def build_model(self, hp):

        inputs = keras.Input(shape=(128,128,3))

        x = layers.Rescaling(1./255)(inputs)

        if hp.Boolean("augment"):

            x = keras.Sequential([
                layers.RandomFlip("horizontal"),
                layers.RandomRotation(0.1),
                layers.RandomZoom(0.2),
            ])(x)

        ###################################
        # CONV BLOCKS
        ###################################

        for i in range(hp.Int("conv_blocks",1,5)):

            filters = hp.Int(f"filters_{i}",32,512,32)

            if hp.Boolean("separable"):
                x = layers.SeparableConv2D(
                    filters,
                    kernel_size=hp.Choice(
                        "kernel",[3,5]
                    ),
                    activation="relu",
                    padding="same"
                )(x)
            else:
                x = layers.Conv2D(
                    filters,
                    3,
                    activation="relu",
                    padding="same"
                )(x)

            if hp.Boolean("bn"):
                x = layers.BatchNormalization()(x)

            x = layers.MaxPooling2D()(x)

            if hp.Boolean("spatial_dropout"):
                x = layers.SpatialDropout2D(0.2)(x)

        ###################################
        # HEAD
        ###################################

        if hp.Boolean("global_pool"):
            x = layers.GlobalAveragePooling2D()(x)
        else:
            x = layers.Flatten()(x)

        for i in range(hp.Int("dense",1,3)):
            x = layers.Dense(
                hp.Int(f"dense_units_{i}",64,512,64),
                activation="relu"
            )(x)

            x = layers.Dropout(
                hp.Float("drop",0,0.6,0.1)
            )(x)

        outputs = layers.Dense(
            self.num_classes,
            activation="softmax"
        )(x)

        lr = hp.Float("lr",1e-5,1e-3,sampling="log")

        optimizer = hp.Choice("opt",[
            keras.optimizers.Adam(lr),
            keras.optimizers.AdamW(lr)
        ])

        model = keras.Model(inputs, outputs)

        model.compile(
            optimizer=optimizer,
            loss="sparse_categorical_crossentropy",
            metrics=["accuracy"]
        )

        return model


    #########################################
    # TRAIN
    #########################################

    def train(self):

        train, val = self.load()

        tuner = kt.Hyperband(
            self.build_model,
            objective="val_accuracy",
            max_epochs=30,
            overwrite=True
        )

        early = keras.callbacks.EarlyStopping(
            patience=7,
            restore_best_weights=True
        )

        tuner.search(
            train,
            validation_data=val,
            epochs=50,
            callbacks=[early]
        )

        self.best_hp = tuner.get_best_hyperparameters(1)[0]

        self.model = tuner.hypermodel.build(self.best_hp)

        self.model.fit(
            train,
            validation_data=val,
            epochs=50,
            callbacks=[early]
        )

        self.val = val


    def evaluate(self):
        return self.model.evaluate(self.val)


    def save(self, name="image_model"):
        self.model.save(name)


In [None]:
# ⭐ HOW TO USE
# 1️⃣ Tabular / Text
from auto_dl import AutoDL

trainer = AutoDL("data.csv")

trainer.train()

trainer.evaluate()

trainer.save("my_model")

In [None]:
# ⭐ 2️⃣ Image
trainer = AutoDL("images/")
trainer.train()