In [1]:
# notebook configuration
COLAB = False # will trigger manual installation of packages
USE_GPU = True 

In [2]:
# basic imports
from pathlib import Path
import os
import math
from datetime import datetime
import random

In [3]:
%matplotlib inline
%config Completer.use_jedi = False
os.environ['WANDB_NOTEBOOK_NAME'] = f"cleanlab_widedeep_{datetime.now().strftime('%Y%m%d')}c.ipynb"

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm

# general ML tooling
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold
from sklearn.metrics import log_loss, roc_auc_score
import wandb
from wandb.xgboost import wandb_callback
from wandb.lightgbm import wandb_callback
from sklearn.impute import SimpleImputer #, KNNImputer

import seaborn as sns

from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
# from sklearn.ensemble import StackingClassifier, RandomForestClassifier
from sklearn.preprocessing import RobustScaler #StandardScaler #, MinMaxScaler, MaxAbsScaler, RobustScaler, PolynomialFeatures
from sklearn.linear_model import LogisticRegression
# from sklearn.feature_selection import SelectKBest, f_regression
from joblib import dump, load
# feature engineering tools
# from sklearn.feature_selection import mutual_info_regression
# import featuretools as ft

# from BorutaShap import BorutaShap
import optuna
from optuna.integration.wandb import WeightsAndBiasesCallback
from optuna.samplers import TPESampler
from sklearn.utils import resample

In [5]:
from cleanlab.classification import LearningWithNoisyLabels

In [6]:
from gauss_rank_scaler import GaussRankScaler

In [7]:
from pytorch_widedeep import Trainer
from pytorch_widedeep.preprocessing import WidePreprocessor, TabPreprocessor
from pytorch_widedeep.models import Wide, TabMlp, WideDeep#, SAINT, TabTransformer, TabNet, TabFastFormer, TabResnet
from pytorch_widedeep.metrics import Accuracy
from torchmetrics import AUROC
import torch
from torch.optim import Adam, AdamW, Adagrad, SGD, RMSprop, LBFGS
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingWarmRestarts, CyclicLR, OneCycleLR, StepLR, CosineAnnealingLR
from pytorch_widedeep.callbacks import EarlyStopping, LRHistory, ModelCheckpoint

In [8]:
# import category_encoders as ce

In [9]:
if COLAB:
    # mount Google Drive
    from google.colab import drive
    drive.mount('/content/drive')
    
    # handling datapath
    datapath = Path('/content/drive/MyDrive/kaggle/tabular_playgrounds/nov2021/')
    
else:
    # if on local machine
#     datapath = Path('/media/sf/easystore/kaggle_data/tabular_playgrounds/sep2021/')  
    root = Path('/media/sf/easystore/kaggle_data/tabular_playgrounds/nov2021/')
    datapath = root/'datasets'
    # edapath = root/'EDA'
    # modelpath = Path('/media/sf/easystore/kaggle_data/tabular_playgrounds/oct2021/models/')
    predpath = root/'preds'
    subpath = root/'submissions'
    
    for pth in [datapath, predpath, subpath]:
        pth.mkdir(exist_ok=True)
    

In [10]:
SEED = 42

# Function to seed everything
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)

seed_everything(seed=SEED)

In [11]:
def reduce_memory_usage(df, verbose=True):
    numerics = ["int8", "int16", "int32", "int64", "float16", "float32", "float64"]
    start_mem = df.memory_usage().sum() / 1024 ** 2
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == "int":
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)
            else:
                if (
                    c_min > np.finfo(np.float16).min
                    and c_max < np.finfo(np.float16).max
                ):
                    df[col] = df[col].astype(np.float16)
                elif (
                    c_min > np.finfo(np.float32).min
                    and c_max < np.finfo(np.float32).max
                ):
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)
    end_mem = df.memory_usage().sum() / 1024 ** 2
    if verbose:
        print(
            "Mem. usage decreased to {:.2f} Mb ({:.1f}% reduction)".format(
                end_mem, 100 * (start_mem - end_mem) / start_mem
            )
        )
    return df

In [12]:
# dataset_params will initially include either trivial class instances or loaded, precomputed artifacts
dataset_params = {
    # 'train_source': str(datapath/'X-RobustScaled-pca_mle-umap_embedding_20211107-n_comp10-n_neighbors15-rs42.joblib'),
    'train_source': str(datapath/'X_orig.feather'),
    'target_source': str(datapath/'y_orig.joblib'),
    # 'test_source': str(datapath/'X_test-RobustScaled-pca_mle-umap_embedding_20211107-n_comp10-n_neighbors15-rs42.joblib'),
    'test_source': str(datapath/'X_test_orig-no_scaling.feather'),
    # 'scaler': str(RobustScaler()),
    # 'pca': str(load(datapath/'pca_mle-RobustScaled_orig_trainset.joblib')),
    # 'umap': str(load(datapath/'umap_reducer-20211107-n_comp10-n_neighbors15-rs42-pca_mle-RobustScaled_orig_trainset.joblib')),
}   

# referring back to the already-entered attributes, specify how the pipeline was sequenced
# dataset_params['preprocessing_pipeline'] = str([dataset_params['scaler'], dataset_params['pca'], dataset_params['umap']]) # ACTUALLY this is unwieldy
# dataset_params['preprocessing_pipeline'] = '[scaler, pca, umap]' # more fragile, but also more readable

# now, load the datasets and generate more metadata from them
# X = load(dataset_params['train_source'])
X = pd.read_feather(dataset_params['train_source'])
y = load(dataset_params['target_source'])
# X_test = load(dataset_params['test_source'])
X_test = pd.read_feather(dataset_params['test_source'])

dataset_params['feature_count'] = X.shape[1]
dataset_params['instance_count'] = X.shape[0]
    

In [13]:
# meta-config for preprocessing and cross-validation, but NOT for model parameters
# in the sweep version, this includes both ex-model parameters and defaults for model parameters
exmodel_config = {
    "arch": 'widedeep-TabMLP',
    # "type": 'sweep',
    # "denoising": "cleanlab",
    "level": 1,
    'random_state': SEED,
    # 'tuner': "Optuna",
    'cross_val_strategy': None, # None for holdout, or the relevant sklearn class
    'kfolds': 1, # if 1, that means just doing holdout
    'test_size': 0.2,
    'scaler': str(GaussRankScaler()),
    **dataset_params
}

wandb_config = {
    # wandb config
    'name': f"{os.environ['WANDB_NOTEBOOK_NAME'][:-6]}_{datetime.now().strftime('%H%M%S')}", # just removes the .ipynb extension, leaving the notebook filename's stem
    'project': '202111_Kaggle_tabular_playground',
    'tags': ['experiment'],
    'notes': "Testing to see if Cleanlab works with WideDeep models sans wrappers; also, comparing a baseline.",
    'config': exmodel_config,
}

In [14]:
# h/t Laurent Pourchot https://www.kaggle.com/pourchot/in-python-tabular-denoising-residual-network/

# 100 bins for the bins head of the NN (i.e. percentiles):
X_bins = np.zeros((X.shape[0],X.shape[1])) # he used all available data for the first tuple entry, but I'll start like this
X_test_bins = np.zeros((X_test.shape[0], X_test.shape[1]))

In [15]:
pd.qcut?

In [16]:
for i in range(X.shape[1]): # assumes X is a pd.DataFrame
    X_bins[:,i] = pd.qcut(X.iloc[:,i],X.shape[1],labels=False)#,duplicates = 'drop')
    
for i in range(X_test.shape[1]): # assumes X_test is a pd.DataFrame
    X_test_bins[:,i] = pd.qcut(X_test.iloc[:,i],X_test.shape[1], labels=False)#,duplicates = 'drop')
# blabeled = X_bins[:X.shape[0],:]
# bunlabeled = X_ins[X.shape[0]:,:]

In [17]:
np.isnan(X_test_bins).any()

False

In [18]:
# X_bins = X_bins.astype(np.int8)
# X_test_bins = X_test_bins.astype(np.int8)

In [19]:
X_bins = pd.DataFrame(X_bins, index=X.index, columns=[f'rkd_f{col}' for col in range(100)])
X_test_bins = pd.DataFrame(X_test_bins, index=X_test.index, columns=[f'rkd_f{col}' for col in range(100)])

In [20]:
scaler = GaussRankScaler(n_jobs=-1, epsilon=0.005)
X_gauss = scaler.fit_transform(X)
X_test_gauss = scaler.transform(X_test)

In [21]:
np.where(np.isnan(X_test_gauss))

(array([], dtype=int64), array([], dtype=int64))

In [22]:
X_gauss = pd.DataFrame(X_gauss, columns=X.columns, index=X.index)
X_test_gauss = pd.DataFrame(X_test_gauss, columns=X_test.columns, index=X_test.index)

In [23]:
X_pre = X_gauss.join(X_bins)
X_test_pre = X_test_gauss.join(X_test_bins)

In [24]:
cont_cols = X_pre.iloc[:,:100].columns
wide_cols = X_pre.iloc[:, 100:].columns

In [25]:
wide_preprocessor = WidePreprocessor(wide_cols=wide_cols)
X_wide = wide_preprocessor.fit_transform(X_pre)
X_test_wide = wide_preprocessor.transform(X_test_pre)

In [26]:
tab_preprocessor = TabPreprocessor(continuous_cols=cont_cols, scale=False, for_transformer=False,embed_cols=wide_cols, already_standard=True)
X_tab = tab_preprocessor.fit_transform(X_pre)
X_test_tab = tab_preprocessor.transform(X_test_pre)

In [27]:
wide = Wide(wide_dim=np.unique(X_wide).shape[0], pred_dim=1)

In [28]:
# deeptabular = TabMlp(continuous_cols=X_gauss.columns, column_idx=tab_preprocessor.column_idx)
deeptabular = TabMlp(continuous_cols=cont_cols, column_idx=tab_preprocessor.column_idx)

In [29]:
model = WideDeep(wide=wide, deeptabular=deeptabular)

In [30]:
# del X_deep_train, X_deep_valid
X_wide_train, X_wide_valid, y_train, y_valid = train_test_split(X_wide, y, test_size=0.2, random_state=42)
X_tab_train, X_tab_valid, _, _ = train_test_split(X_tab, y, test_size=0.2, random_state=42)

In [31]:
y_train = np.array(y_train)
y_valid = np.array(y_valid)

In [32]:
n_epochs = 4

wide_opt = AdamW(model.wide.parameters(), lr=0.1)
deep_opt = AdamW(model.deeptabular.parameters(), lr=0.1)

wide_sch = OneCycleLR(optimizer=wide_opt, max_lr=0.01, steps_per_epoch=X_wide_train.shape[0], epochs=n_epochs)
deep_sch = OneCycleLR(optimizer=deep_opt, max_lr=0.01, steps_per_epoch=X_tab_train.shape[0], epochs=n_epochs)

optimizers = {'wide': wide_opt, 'deeptabular': deep_opt }
lr_schedulers = {'wide': wide_sch, 'deeptabular': deep_sch }


callbacks = [
    LRHistory(n_epochs=n_epochs), 
]

# trainer
trainer = Trainer(model=model, 
                  objective='binary', 
                  metrics=[Accuracy], # with AUROC got TypeError: '>' not supported between instances of 'NoneType' and 'int' 
                  seed=42, 
                  optimizers=optimizers,
                  callbacks=callbacks
                 )

#             print(f"type(X_train_wide) is {type(X_train_wide)} and type(X_train_tab) is {type(X_train_tab)}")
trainer.fit( # this is where problem is beginning
    X_wide=X_wide_train,
    X_tab=X_tab_train,
    target=y_train,
    n_epochs=n_epochs,
    batch_size=1024, # default value is 32
#                 val_split=0.2, # no need for this
)

y_valid_preds = trainer.predict_proba(X_wide=X_wide_valid, X_tab=X_tab_valid, batch_size=1024)[:,1]
           
    

In [33]:
dirty_preds = trainer.predict_proba(X_wide=X_test_wide, X_tab=X_test_tab, batch_size=1024,)[:,1]
np.isnan(dirty_preds).any()

False

In [34]:
np.isnan(y_valid_preds).any()

False

In [35]:
np.isnan(X_test_wide).any()

False

In [36]:
np.isnan(X_tab).any()

False

In [37]:
for i in range(X.shape[1]): # assumes X is a pd.DataFrame
    X_bins[:,i] = pd.qcut(X.iloc[:,i],X.shape[1],labels=False)#,duplicates = 'drop')
    
for i in range(X_test.shape[1]): # assumes X_test is a pd.DataFrame
    X_test_bins[:,i] = pd.qcut(X_test.iloc[:,i],X_test.shape[1], labels=False)#,duplicates = 'drop')
# blabeled = X_bins[:X.shape[0],:]
# bunlabeled = X_ins[X.shape[0]:,:]

In [38]:
np.isnan(y_valid_preds).any()

False

In [39]:
np.isnan(X_test_wide).any()

False

In [40]:
np.isnan(X_tab).any()

False

In [41]:
np.isnan(X_test_tab).any()

False

In [42]:
dump(dirty_preds, predpath/'cleanlab_widedeep_20211123-TabMLP-dirtydata-holdout-baseline_preds.joblib')

['/media/sf/easystore/kaggle_data/tabular_playgrounds/nov2021/preds/cleanlab_widedeep_20211123-TabMLP-dirtydata-holdout-baseline_preds.joblib']

In [43]:
sample_df = pd.read_csv(datapath/'sample_submission.csv')

In [44]:
sample_df.head()

       id  target
0  600000     0.5
1  600001     0.5
2  600002     0.5
3  600003     0.5
4  600004     0.5

In [45]:
sample_df.loc[:, 'target'] = dirty_preds

In [46]:
sample_df.head()

       id    target
0  600000  0.999271
1  600001  0.998434
2  600002  0.508877
3  600003  0.993828
4  600004  0.958983

In [47]:
sample_df.to_csv(subpath/f"{wandb_config['name']}-TabMLP-dirtydata-holdout-baseline_preds.csv", index=False)
# sample_df.to_csv(subpath/f"{wandb_config['name']}_3level-X_orig+KMeans8+synth-GBM-stack_ensemble_preds.csv", index=False)

In [48]:
n_epochs = 50

wide_opt = AdamW(model.wide.parameters(), lr=0.1)
deep_opt = AdamW(model.deeptabular.parameters(), lr=0.1)

wide_sch = OneCycleLR(optimizer=wide_opt, max_lr=0.01, steps_per_epoch=X_wide_train.shape[0], epochs=n_epochs)
deep_sch = OneCycleLR(optimizer=deep_opt, max_lr=0.01, steps_per_epoch=X_tab_train.shape[0], epochs=n_epochs)

optimizers = {'wide': wide_opt, 'deeptabular': deep_opt }
lr_schedulers = {'wide': wide_sch, 'deeptabular': deep_sch }


callbacks = [
    LRHistory(n_epochs=n_epochs), 
]

# trainer
trainer = Trainer(model=model, 
                  objective='binary', 
                  metrics=[Accuracy], # with AUROC got TypeError: '>' not supported between instances of 'NoneType' and 'int' 
                  seed=42, 
                  optimizers=optimizers,
                  callbacks=callbacks
                 )

#             print(f"type(X_train_wide) is {type(X_train_wide)} and type(X_train_tab) is {type(X_train_tab)}")
trainer.fit( # this is where problem is beginning
    X_wide=X_wide_train,
    X_tab=X_tab_train,
    target=y_train,
    n_epochs=n_epochs,
    batch_size=1024, # default value is 32
#                 val_split=0.2, # no need for this
)

y_valid_preds = trainer.predict_proba(X_wide=X_wide_valid, X_tab=X_tab_valid, batch_size=1024)[:,1]
           
    

In [49]:
dirty_preds = trainer.predict_proba(X_wide=X_test_wide, X_tab=X_test_tab, batch_size=1024,)[:,1]
np.isnan(dirty_preds).any()

False

In [50]:
dump(dirty_preds, predpath/'cleanlab_widedeep_20211123-TabMLP-dirtydata-holdout-baseline_preds.joblib')

['/media/sf/easystore/kaggle_data/tabular_playgrounds/nov2021/preds/cleanlab_widedeep_20211123-TabMLP-dirtydata-holdout-baseline_preds.joblib']

In [51]:
sample_df = pd.read_csv(datapath/'sample_submission.csv')

In [52]:
sample_df.head()

       id  target
0  600000     0.5
1  600001     0.5
2  600002     0.5
3  600003     0.5
4  600004     0.5

In [53]:
sample_df.loc[:, 'target'] = dirty_preds

In [54]:
sample_df.head()

       id    target
0  600000  0.745735
1  600001  0.543516
2  600002  0.799951
3  600003  0.382965
4  600004  0.499106

In [55]:
sample_df.to_csv(subpath/f"{wandb_config['name']}-TabMLP-dirtydata-holdout-baseline_preds.csv", index=False)
# sample_df.to_csv(subpath/f"{wandb_config['name']}_3level-X_orig+KMeans8+synth-GBM-stack_ensemble_preds.csv", index=False)

In [56]:
trainer.history

{'train_loss': [1.5782640417501617,
  1.4658180739579678,
  1.3636410360905662,
  1.2640171056108942,
  1.169310306181023,
  1.0811104722368692,
  0.9988063434039606,
  0.9238579072423582,
  0.8566404775515802,
  0.7971974780310446,
  0.7463966960083447,
  0.7034459214474855,
  0.6698591036837238,
  0.6441153502667637,
  0.6251680727706534,
  0.6110981505816934,
  0.6017060001521731,
  0.5949167211426855,
  0.5902555257018441,
  0.5876799241057845,
  0.5852626996762209,
  0.5839717616912907,
  0.5829337349832694,
  0.5815511997828859,
  0.5811689098252416,
  0.5802307953712529,
  0.5796380260351625,
  0.579643978365957,
  0.5785929005283282,
  0.5785237953606953,
  0.5782156373137859,
  0.5775741117595355,
  0.5770682386243775,
  0.5766099651993465,
  0.5763754135510052,
  0.5759396197190926,
  0.5757148849176191,
  0.5756011860711234,
  0.5746617186298248,
  0.5745866466432746,
  0.5743073116995887,
  0.5735866446484889,
  0.5734975879380444,
  0.5732190976264888,
  0.5729572251915678

In [57]:
len(trainer.history['train_loss'])

50

In [58]:
del X_gauss, X_test_gauss, X_pre, X_test_pre

In [59]:
# trainer
clean_trainer = Trainer(model=model, 
                  objective='binary', 
                  metrics=[Accuracy], # with AUROC got TypeError: '>' not supported between instances of 'NoneType' and 'int' 
                  seed=42, 
                  optimizers=optimizers,
                  callbacks=callbacks
                 )

In [60]:
wandb.init(
    project="202111_Kaggle_tabular_playground",
    save_code=True,
    tags=wandb_config['tags'],
    name=wandb_config['name'],
    notes=wandb_config['notes'],
    config=exmodel_config
) 

<wandb.sdk.wandb_run.Run at 0x7f5ad414a850>

In [61]:
model.parameters

<bound method Module.parameters of WideDeep(
  (wide): Wide(
    (wide_linear): Embedding(10001, 1, padding_idx=0)
  )
  (deeptabular): Sequential(
    (0): TabMlp(
      (cat_embed_and_cont): CatEmbeddingsAndCont(
        (cont_norm): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (tab_mlp): MLP(
        (mlp): Sequential(
          (dense_layer_0): Sequential(
            (0): Dropout(p=0.1, inplace=False)
            (1): Linear(in_features=100, out_features=200, bias=True)
            (2): ReLU(inplace=True)
          )
          (dense_layer_1): Sequential(
            (0): Dropout(p=0.1, inplace=False)
            (1): Linear(in_features=200, out_features=100, bias=True)
            (2): ReLU(inplace=True)
          )
        )
      )
    )
    (1): Linear(in_features=100, out_features=1, bias=True)
  )
)>

In [62]:
model.parameters()

<generator object Module.parameters at 0x7f58c6fba660>

In [63]:
model.parameters

<bound method Module.parameters of WideDeep(
  (wide): Wide(
    (wide_linear): Embedding(10001, 1, padding_idx=0)
  )
  (deeptabular): Sequential(
    (0): TabMlp(
      (cat_embed_and_cont): CatEmbeddingsAndCont(
        (cont_norm): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (tab_mlp): MLP(
        (mlp): Sequential(
          (dense_layer_0): Sequential(
            (0): Dropout(p=0.1, inplace=False)
            (1): Linear(in_features=100, out_features=200, bias=True)
            (2): ReLU(inplace=True)
          )
          (dense_layer_1): Sequential(
            (0): Dropout(p=0.1, inplace=False)
            (1): Linear(in_features=200, out_features=100, bias=True)
            (2): ReLU(inplace=True)
          )
        )
      )
    )
    (1): Linear(in_features=100, out_features=1, bias=True)
  )
)>

In [64]:
str(model.parameters)

'<bound method Module.parameters of WideDeep(\n  (wide): Wide(\n    (wide_linear): Embedding(10001, 1, padding_idx=0)\n  )\n  (deeptabular): Sequential(\n    (0): TabMlp(\n      (cat_embed_and_cont): CatEmbeddingsAndCont(\n        (cont_norm): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)\n      )\n      (tab_mlp): MLP(\n        (mlp): Sequential(\n          (dense_layer_0): Sequential(\n            (0): Dropout(p=0.1, inplace=False)\n            (1): Linear(in_features=100, out_features=200, bias=True)\n            (2): ReLU(inplace=True)\n          )\n          (dense_layer_1): Sequential(\n            (0): Dropout(p=0.1, inplace=False)\n            (1): Linear(in_features=200, out_features=100, bias=True)\n            (2): ReLU(inplace=True)\n          )\n        )\n      )\n    )\n    (1): Linear(in_features=100, out_features=1, bias=True)\n  )\n)>'

In [65]:
model.parameters

<bound method Module.parameters of WideDeep(
  (wide): Wide(
    (wide_linear): Embedding(10001, 1, padding_idx=0)
  )
  (deeptabular): Sequential(
    (0): TabMlp(
      (cat_embed_and_cont): CatEmbeddingsAndCont(
        (cont_norm): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (tab_mlp): MLP(
        (mlp): Sequential(
          (dense_layer_0): Sequential(
            (0): Dropout(p=0.1, inplace=False)
            (1): Linear(in_features=100, out_features=200, bias=True)
            (2): ReLU(inplace=True)
          )
          (dense_layer_1): Sequential(
            (0): Dropout(p=0.1, inplace=False)
            (1): Linear(in_features=200, out_features=100, bias=True)
            (2): ReLU(inplace=True)
          )
        )
      )
    )
    (1): Linear(in_features=100, out_features=1, bias=True)
  )
)>

In [66]:
model.parameters(recurse=True)

<generator object Module.parameters at 0x7f58c6fbac10>

In [67]:
model.parameters

<bound method Module.parameters of WideDeep(
  (wide): Wide(
    (wide_linear): Embedding(10001, 1, padding_idx=0)
  )
  (deeptabular): Sequential(
    (0): TabMlp(
      (cat_embed_and_cont): CatEmbeddingsAndCont(
        (cont_norm): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      )
      (tab_mlp): MLP(
        (mlp): Sequential(
          (dense_layer_0): Sequential(
            (0): Dropout(p=0.1, inplace=False)
            (1): Linear(in_features=100, out_features=200, bias=True)
            (2): ReLU(inplace=True)
          )
          (dense_layer_1): Sequential(
            (0): Dropout(p=0.1, inplace=False)
            (1): Linear(in_features=200, out_features=100, bias=True)
            (2): ReLU(inplace=True)
          )
        )
      )
    )
    (1): Linear(in_features=100, out_features=1, bias=True)
  )
)>

In [68]:
import torchinfo

In [69]:
torchinfo.summary(model)

Layer (type:depth-idx)                        Param #
WideDeep                                      --
├─Wide: 1-1                                   --
│    └─Embedding: 2-1                         10,001
├─Sequential: 1-2                             --
│    └─TabMlp: 2-2                            --
│    │    └─CatEmbeddingsAndCont: 3-1         200
│    │    └─MLP: 3-2                          40,300
│    └─Linear: 2-3                            101
Total params: 50,602
Trainable params: 50,602
Non-trainable params: 0

In [70]:
summary = torchinfo.summary(model)

In [71]:
?summary

In [72]:
str(summary)



In [73]:
wandb.log({'overall_valid_auc': roc_auc_score(y_true=y_valid, y_score=y_valid_preds),
           'model_params': model.parameters,
           'model_summary': torchinfo.summary(model),
           'model_seed': 42,
           'leadboard_auc': 0.72913,
          })

In [74]:
wandb.log({'overall_valid_auc': roc_auc_score(y_true=y_valid, y_score=y_valid_preds),
           'model_params': model.parameters,
           'model_summary': str(torchinfo.summary(model)),
           'model_seed': 42,
           'leadboard_auc': 0.72913,
          })

In [75]:
wandb.finish()