In [None]:
# Reordering the column names of a dataset.

df['ProductSize'].unique()

sizes = 'Large','Large / Medium','Medium','Small','Mini','Compact'

df['ProductSize'] = df['ProductSize'].astype('category')
df['ProductSize'].cat.set_categories(sizes, ordered=True, inplace=True)

#### **Pytorch Wide Deep**

**TabMLP**

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

adult = pd.read_csv("data/adult/adult.csv.zip")
adult.columns = [c.replace("-", "_") for c in adult.columns]
adult["income_label"] = (adult["income"].apply(lambda x: ">50K" in x)).astype(int)
adult.drop("income", axis=1, inplace=True)

for c in adult.columns:
    if adult[c].dtype == 'O':
        adult[c] = adult[c].apply(lambda x: "unknown" if x == "?" else x)
        adult[c] = adult[c].str.lower()
adult_train, adult_test = train_test_split(adult, test_size=0.2, stratify=adult.income_label)

from pytorch_widedeep.preprocessing import TabPreprocessor

# define the embedding and continuous columns, and target
embed_cols = [
    ('workclass', 6), 
    ('education', 8), 
    ('marital_status', 6), 
    ('occupation',8), 
    ('relationship', 6), 
    ('race', 6)]
cont_cols = ["age", "hours_per_week", "fnlwgt", "educational_num"]
target = adult_train["income_label"].values

# prepare deeptabular component
tab_preprocessor = TabPreprocessor(embed_cols=embed_cols, continuous_cols=cont_cols)
X_tab = tab_preprocessor.fit_transform(adult_train)


from pytorch_widedeep.models import TabMlp, WideDeep

tabmlp = TabMlp(
    mlp_hidden_dims=[200, 100],
    column_idx=tab_preprocessor.column_idx,
    embed_input=tab_preprocessor.embeddings_input, 
    continuous_cols=cont_cols,
    batchnorm_cont=True,
)
model = WideDeep(deeptabular=tabmlp)

# Model Training
from pytorch_widedeep import Trainer
from pytorch_widedeep.metrics import Accuracy

trainer = Trainer(model, objective="binary", metrics=[(Accuracy)])
trainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256, val_split=0.2)

**TabResnet**

In [None]:
from pytorch_widedeep.models import TabResnet

tabresnet = TabResnet(
    column_idx=tab_preprocessor.column_idx,
    embed_input=tab_preprocessor.embeddings_input, 
    continuous_cols=cont_cols,
    batchnorm_cont=True,
    blocks_dims=[200, 100, 100],
    mlp_hidden_dims=[100, 50],
)
model = WideDeep(deeptabular=tabresnet)

trainer = Trainer(model, objective="binary", metrics=[(Accuracy)])
trainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256, val_split=0.2)

**TabTransformer**

In [None]:
embed_cols = [
  'workclass', 
  'education', 
  'marital_status', 
  'occupation', 
  'relationship', 
  'race'
]
tab_preprocessor = TabPreprocessor(
    embed_cols=embed_cols, 
    continuous_cols=cont_cols, 
    for_tabtransformer=True)

X_tab = tab_preprocessor.fit_transform(adult_train)


from pytorch_widedeep.models import TabTransformer

tabtransformer = TabTransformer(
    column_idx=tab_preprocessor.column_idx,
    embed_input=tab_preprocessor.embeddings_input, 
    continuous_cols=cont_cols,
    shared_embed=True,
    num_blocks=3,
)
model = WideDeep(deeptabular=tabtransformer)


trainer = Trainer(model, objective="binary", metrics=[(Accuracy)])
trainer.fit(X_tab=X_tab, target=target, n_epochs=5, batch_size=256, val_split=0.2)