# Debugging autoreload

In [ ]:
%load_ext autoreload
%autoreload 2

# Load packages

In [None]:
from pytorch_tabular.utils import load_covertype_dataset
from rich.pretty import pprint
from sklearn.model_selection import train_test_split
import numpy as np
from pytorch_tabular.utils import make_mixed_dataset, print_metrics
from pytorch_tabular import available_models
from pytorch_tabular import TabularModel
from pytorch_tabular.models import CategoryEmbeddingModelConfig, GANDALFConfig
from pytorch_tabular.config import DataConfig, OptimizerConfig, TrainerConfig
from pytorch_tabular.models.common.heads import LinearHeadConfig
from pytorch_lightning.callbacks import DeviceStatsMonitor

In [None]:
data, cat_col_names, num_col_names = make_mixed_dataset(
    task="regression", n_samples=10000, n_features=20, n_categories=0, n_targets=1, random_state=42
)
target_col = "target"
train, test = train_test_split(data, random_state=42)
train, val = train_test_split(train, random_state=42)

# DataConfig

In [None]:
data_config = DataConfig(
    target=[target_col],
    continuous_cols=num_col_names,
    categorical_cols=cat_col_names,
    validation_split=0.2,
    continuous_feature_transform="quantile_normal",
    normalize_continuous_features=True,
    num_workers=0,
    pin_memory=True,
)

# OptimizerConfig

In [None]:
optimizer_config = OptimizerConfig(
    optimizer="Adam",
    lr_scheduler="CosineAnnealingWarmRestarts",
    lr_scheduler_params={"T_0": 10, "T_mult": 1, "eta_min": 1e-5},
    lr_scheduler_monitor_metric='valid_loss'
)

# ModelConfig

In [None]:
pprint(available_models())

In [None]:
head_config = LinearHeadConfig(
    layers="",  # No additional layer in head, just a mapping layer to output_dim
    activation="ReLU",
    dropout=0.1,
    use_batch_norm=False,
    initialization="kaiming",
).__dict__  # Convert to dict to pass to the model config (OmegaConf doesn't accept objects)

model_config = CategoryEmbeddingModelConfig(
    task="regression",
    head="LinearHead",  # Linear Head
    head_config=head_config,  # Linear Head Config
    learning_rate=1e-3,
    loss="L1Loss",
    metrics=["mean_absolute_error", "pearson_corrcoef"],
    target_range=[(float(train[col].min()),float(train[col].max())) for col in [target_col]],
    seed=42,
    
    layers="64-32-16",
    activation="LeakyReLU",
    use_batch_norm=False,
    initialization="kaiming",
    dropout=0.1,
)

# TabularModel

In [None]:
tabular_model = TabularModel(
    data_config=data_config,
    model_config="D:/Work/bbs/notebooks/immunology/001_pytorch_tabular_SImAge_log/CategoryEmbeddingModelConfig.yaml",
    optimizer_config=optimizer_config,
    trainer_config="D:/Work/bbs/notebooks/immunology/001_pytorch_tabular_SImAge_log/TrainerConfig.yaml",
    verbose=True,
    suppress_lightning_logger=False
)

# Training the model

In [None]:
tabular_model.fit(
    train=train,
    validation=val,
    # target_transform=[np.log, np.exp],
    callbacks=[DeviceStatsMonitor()],
)

In [None]:
prediction = tabular_model.predict(test, progress_bar='rich')

In [None]:
tabular_model.evaluate(test, verbose=True)

In [None]:
tabular_model.trainer.checkpoint_callback.best_model_path

In [None]:
tabular_model.evaluate(test, verbose=True, ckpt_path="best")

In [None]:
tabular_model.summary()

In [None]:
tabular_model.save_config(tabular_model.config['checkpoints_path'])

In [None]:
model = TabularModel.load_model("D:/YandexDisk/Work/pydnameth/datasets/GPL21145/GSEUNN/special/060_EpiSImAge/SImAge_log/wtf")