In [None]:
#| default_exp ml_model

In [None]:
#| export
from relax.import_essentials import *
from relax.data_module import DataModule, DEFAULT_DATA_CONFIGS
from relax.utils import validate_configs
from relax.base import *
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from urllib.request import urlretrieve

Using JAX backend.


An NVIDIA GPU may be present on this machine, but a CUDA-enabled jaxlib is not installed. Falling back to cpu.


## ML Module

In [None]:
#| export
class MLPBlock(keras.layers.Layer):
    """MLP block with leaky relu activation and dropout/batchnorm."""

    def __init__(
        self, 
        output_size: int, 
        dropout_rate: float = 0.3,
        use_batch_norm: bool = False
    ):
        super().__init__()
        self.output_size = output_size
        self.dropout_rate = dropout_rate
        self.use_batch_norm = use_batch_norm
        if use_batch_norm and dropout_rate > 0:
            warnings.warn("Batch normalization and dropout are usually mutually exclusive.")

    def build(self, input_shape):
        self.dense = keras.layers.Dense(
            self.output_size, activation='leaky_relu', 
            kernel_initializer='he_normal'
        )
        self.dropout = keras.layers.Dropout(self.dropout_rate)
        if self.use_batch_norm:
            self.batch_norm = keras.layers.BatchNormalization()

    def call(self, x, training=False):
        x = self.dense(x)
        x = self.dropout(x, training=training)
        if self.use_batch_norm:
            x = self.batch_norm(x, training=training)
        return x

@keras.saving.register_keras_serializable()
class MLP(keras.Model):
    """MLP model with multiple MLP blocks and a dense layer at the end."""
    
    def __init__(
        self, 
        sizes: list, 
        output_size: int = 2,
        dropout_rate: float = 0.3,
        use_batch_norm: bool = False,
        last_activation: str = 'softmax',
        **kwargs
    ):
        super().__init__(**kwargs)
        self.blocks = []
        for size in sizes:
            self.blocks.append(MLPBlock(size, dropout_rate, use_batch_norm))
        self.dense = keras.layers.Dense(output_size, activation=last_activation)

    def call(self, x, training=False):
        for block in self.blocks:
            x = block(x, training=training)
        return self.dense(x)

    def get_config(self):
        return {
            'sizes': [block.output_size for block in self.blocks],
            'output_size': self.dense.units,
            'dropout_rate': self.blocks[0].dropout_rate,
            'last_activation': self.dense.activation.__name__,
        }

In [None]:
#| export
class MLModuleConfig(BaseConfig):
    """Configurator of `MLModule`."""
    
    sizes: List[int] = Field([64, 32, 16], description="List of hidden layer sizes.")
    output_size: int = Field(2, description="The number of output classes.")
    dropout_rate: float = Field(0.3, description="Dropout rate.")
    lr: float = Field(1e-3, description="Learning rate.")
    opt_name: str = Field("adam", description="Optimizer name.")
    loss: str = Field("sparse_categorical_crossentropy", description="Loss function name.")
    metrics: List[str] = Field(["accuracy"], description="List of metrics names.")


In [None]:
#| export
class MLModule(BaseModule, TrainableMixedin, PredFnMixedin):
    def __init__(self, config: MLModuleConfig = None, *, model: keras.Model = None, name: str = None):
        if config is None:
            config = MLModuleConfig()
        config = validate_configs(config, MLModuleConfig)
        self.model = self._init_model(config, model)
        self._is_trained = False
        super().__init__(config, name=name)

    def _init_model(self, config: MLModuleConfig, model: keras.Model):
        if model is None:
            model = MLP(
                sizes=config.sizes,
                output_size=config.output_size,
                dropout_rate=config.dropout_rate
            )
        model.compile(
            optimizer=keras.optimizers.get({
                'class_name': config.opt_name, 
                'config': {'learning_rate': config.lr}
            }),
            loss=config.loss,
            metrics=config.metrics
        )
        return model
            
    def train(
        self, 
        data: DataModule, 
        batch_size: int = 128,
        epochs: int = 10,
        **fit_kwargs
    ):
        if isinstance(data, DataModule):
            X_train, y_train = data['train']
        else:
            X_train, y_train = data
        self.model.fit(
            X_train, y_train, 
            batch_size=batch_size, 
            epochs=epochs,
            **fit_kwargs
        )
        self._is_trained = True
        return self
    
    @property
    def is_trained(self) -> bool:
        return self._is_trained
    
    def save(self, path):
        path = Path(path)
        if not path.exists():
            path.mkdir(parents=True)
        # self.model.save_weights(path / "model.weights.h5", overwrite=True)
        self.model.save(path / "model.keras")
        with open(path / "config.json", "w") as f:
            json.dump(self.config.dict(), f)

    @classmethod
    def load_from_path(cls, path):
        path = Path(path)
        config = MLModuleConfig(**json.load(open(path / "config.json")))
        # model = keras.models.load_model(path / "model.keras")
        model = keras.saving.load_model(path / "model.keras", compile=False)
        module = cls(config, model=model)
        module._is_trained = True
        return module
    
    def pred_fn(self, x):
        if not self.is_trained:
            raise ValueError("Model is not trained.")
        return self.model(x, training=False)

In [None]:
X, y = make_classification(
    n_samples=5000, n_features=10, n_informative=5, random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
model = MLModule(
    MLModuleConfig(sizes=[64, 32, 16],)
)
model.train((X_train, y_train), epochs=5)
assert model.is_trained

Epoch 1/5
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.5163 - loss: 1.2818 
Epoch 2/5
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7131 - loss: 0.6371
Epoch 3/5
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7410 - loss: 0.5505
Epoch 4/5
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7581 - loss: 0.5286
Epoch 5/5
[1m30/30[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.7812 - loss: 0.4631


In [None]:
model.save('tmp/model')

In [None]:
model_1 = MLModule.load_from_path('tmp/model')
assert model_1.is_trained
assert np.allclose(model_1.pred_fn(X_test), model.pred_fn(X_test))

In [None]:
#| hide
# remove tmp directory
shutil.rmtree('tmp', ignore_errors=True)

In [None]:
#| hide
from relax.data_module import DataModule, DEFAULT_DATA_CONFIGS, load_data
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

In [None]:
#| hide
#| eval: false
# def train_ml_model_and_rf(data, config={}):
#     dm = load_data(data)
#     file_path = f"assets/{data}/model/model.keras"
#     conf_path = f"assets/{data}/model/config.json"
#     ckpt_cb = keras.callbacks.ModelCheckpoint(
#         filepath=file_path,
#         monitor='val_accuracy',
#         mode='max',
#         save_best_only=True
#     )
#     train_xs, train_ys = dm['train']
#     test_xs, test_ys = dm['test']
#     model = MLModule(config).train(
#         dm, validation_data=dm['test'], callbacks=[ckpt_cb]
#     )
#     model.config.save(conf_path)
#     # Load the best model
#     model = MLModule.load_from_path(f"assets/{data}/model")

#     rf = RandomForestClassifier().fit(train_xs, train_ys.reshape(-1))
#     rf_acc = accuracy_score(test_ys, rf.predict(test_xs))
#     model_acc = accuracy_score(test_ys, model.pred_fn(test_xs).argmax(axis=1))
#     return rf_acc, model_acc


In [None]:
# models = []
# for data in DEFAULT_DATA_CONFIGS.keys():
#     rf_acc, model_acc = train_ml_model_and_rf(data)
#     if rf_acc > model_acc:
#         models.append((data, rf_acc, model_acc))

In [None]:
# data = "dummy"
# dm = load_data(data)
# file_path = f"assets/{data}/model/model.keras"
# conf_path = f"assets/{data}/model/config.json"
# ckpt_cb = keras.callbacks.ModelCheckpoint(
#     filepath=file_path,
#     monitor='val_accuracy',
#     mode='max',
#     save_best_only=True
# )
# train_xs, train_ys = dm['train']
# test_xs, test_ys = dm['test']
# model = MLModule({
#     'sizes': [128, 64, 32, 16],
#     'dropout_rate': 0.3, 'lr': 0.001,
#     'opt_name': 'adamw'
# }).train(
#     dm, validation_data=dm['test'], callbacks=[ckpt_cb], batch_size=64, epochs=10
# )
# model.config.save(conf_path)
# # Load the best model
# model = MLModule.load_from_path(f"assets/{data}/model")


# rf = RandomForestClassifier().fit(train_xs, train_ys.reshape(-1))
# rf_acc = accuracy_score(test_ys, rf.predict(test_xs))
# model_acc = accuracy_score(test_ys, model.pred_fn(test_xs).argmax(axis=1))

# rf_acc, model_acc

## Load ML Module

TODO: Need test cases

In [None]:
#| export
def download_ml_module(name: str, path: str = None):
    if path is None:
        path = Path('relax-assets') / name / 'model'
    else:
        path = Path(path)
    if not path.exists():
        path.mkdir(parents=True)
    model_url = f"https://huggingface.co/datasets/birkhoffg/ReLax-Assets/resolve/main/{name}/model/model.keras"
    config_url = f"https://huggingface.co/datasets/birkhoffg/ReLax-Assets/resolve/main/{name}/model/config.json"

    if not (path / "model.keras").exists():
        urlretrieve(model_url, filename=str(path / "model.keras"))
    if not (path / "config.json").exists():
        urlretrieve(config_url, filename=str(path / "config.json"))   
    

def load_ml_module(name: str) -> MLModule:
    """Load the ML module"""

    if name not in DEFAULT_DATA_CONFIGS.keys():
        raise ValueError(f'`data_name` must be one of {DEFAULT_DATA_CONFIGS.keys()}, '
            f'but got data_name={name}.')

    download_ml_module(name)
    return MLModule.load_from_path(f"relax-assets/{name}/model")

In [None]:
for name in DEFAULT_DATA_CONFIGS.keys():
    dm = load_data(name)
    ml_model = load_ml_module(name)
    X_train, y_train = dm['train']
    X_test, y_test = dm['test']
    model_acc = accuracy_score(y_test, ml_model.pred_fn(X_test).argmax(axis=1))

## AutoEncoder

In [None]:
#| export
class AutoEncoder(keras.Model):
    def __init__(
        self,
        enc_sizes: List[int],
        dec_sizes: List[int],
        output_size: int,
        dropout_rate: float = 0.2,
        last_activation: str = "sigmoid",
        name: str = "autoencoder",
        **kwargs
    ):
        super().__init__(name=name, **kwargs)
        self.encoder = keras.Sequential(
            [MLPBlock(size, dropout_rate=dropout_rate) for size in enc_sizes]
        )
        self.decoder = keras.Sequential(
            [MLPBlock(size, dropout_rate=dropout_rate) for size in dec_sizes]
        )
        self.output_layer = keras.layers.Dense(output_size, activation=last_activation)
    
    def call(self, x, training=False):
        z = self.encoder(x, training=training)
        x = self.decoder(z, training=training)
        reconstructed = self.output_layer(x, training=training)
        return reconstructed

In [None]:
ae = AutoEncoder([10, 5], [5, 10], output_size=10, last_activation=None)
ae.compile(optimizer='adam', loss='mse')

In [None]:
ae.fit(X_train, X_train, epochs=5, batch_size=128)

Epoch 1/5
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 161ms/step - loss: 0.3980
Epoch 2/5
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.3583
Epoch 3/5
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.3352
Epoch 4/5
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.3157
Epoch 5/5
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.2967


<keras_core.src.callbacks.history.History>