In [None]:
# 1. FASTAI 
# 2. EMB + LIGHTGBM 
# 3. LOGISTIC
# 4. deepfm
# 5. catboost
# 6. h2o

In [None]:
from fastai.tabular import * 
from fastai.callbacks import EarlyStoppingCallback, ReduceLROnPlateauCallback
from fastai.callbacks import *
from ranger import Ranger

In [None]:
df = pd.read_csv('train.csv')
df.set_index('id',drop=True,inplace=True)
test_df = pd.read_csv('test.csv')
test_df.set_index('id',drop=True,inplace=True)
sample = pd.read_csv('sample_submission.csv')

In [None]:
dep_var = 'target'
cat_names = ['bin_0', 'bin_1', 'bin_2', 'bin_3', 'bin_4', 'nom_0', 'nom_1',
             'nom_2', 'nom_3', 'nom_4', 'nom_5', 'nom_6', 'nom_7', 'nom_8',
             'nom_9','ord_0', 'ord_1', 'ord_2', 'ord_3', 'ord_4', 'ord_5',
             'day', 'month']
procs = [FillMissing, Categorify, Normalize]

In [None]:
embed = {}
for col in cat_names:
    embed[col] = min(50, df[col].nunique()) 

In [None]:
test = TabularList.from_df(test_df, cat_names=cat_names)

In [None]:
data_fold = (TabularList.from_df(df, cat_names=cat_names, procs=procs)
                 .split_by_idx([range(int(len(train)*0.8), len(train))])
                 .label_from_df(cols=dep_var)
                 .add_test(test)
                 .databunch(bs=32))

In [None]:
data_fold.show_batch()

In [None]:
class Mish(nn.Module):
    def __init__(self):
        super().__init__()
        print("Mish activation loaded...")
    def forward(self,x):
        x = x * (torch.tanh(F.softplus(x)))
        return x

In [None]:
class TabularModel(Module):
    "Basic model for tabular data."
    def __init__(self, emb_szs:ListSizes, n_cont:int, out_sz:int, layers:Collection[int], ps:Collection[float]=None,
                 emb_drop:float=0., y_range:OptRange=None, use_bn:bool=True, bn_final:bool=False):
        super().__init__()
        ps = ifnone(ps, [0]*len(layers))
        ps = listify(ps, layers)
        self.embeds = nn.ModuleList([embedding(ni, nf) for ni,nf in emb_szs])
        self.emb_drop = nn.Dropout(emb_drop)
        self.bn_cont = nn.BatchNorm1d(n_cont)
        n_emb = sum(e.embedding_dim for e in self.embeds)
        self.n_emb,self.n_cont,self.y_range = n_emb,n_cont,y_range
        sizes = self.get_sizes(layers, out_sz)
        actns = [Mish(inplace=True) for _ in range(len(sizes)-2)] + [None]
        layers = []
        for i,(n_in,n_out,dp,act) in enumerate(zip(sizes[:-1],sizes[1:],[0.]+ps,actns)):
            layers += bn_drop_lin(n_in, n_out, bn=use_bn and i!=0, p=dp, actn=act)
        if bn_final: layers.append(nn.BatchNorm1d(sizes[-1]))
        self.layers = nn.Sequential(*layers)

    def get_sizes(self, layers, out_sz):
        return [self.n_emb + self.n_cont] + layers + [out_sz]

    def forward(self, x_cat:Tensor, x_cont:Tensor) -> Tensor:
        if self.n_emb != 0:
            x = [e(x_cat[:,i]) for i,e in enumerate(self.embeds)]
            x = torch.cat(x, 1)
            x = self.emb_drop(x)
        if self.n_cont != 0:
            x_cont = self.bn_cont(x_cont)
            x = torch.cat([x, x_cont], 1) if self.n_emb != 0 else x_cont
        x = self.layers(x)
        if self.y_range is not None:
            x = (self.y_range[1]-self.y_range[0]) * torch.sigmoid(x) + self.y_range[0]
        return x

In [None]:
learn = tabular_learner(data_fold, emb_szs=embed, layers=[300,300], 
                        ps=[0.001,0.01], emb_drop=0.04, 
                        metrics=[accuracy, AUROC()],
                        opt_func=Ranger,
                        callback_fns=[partial(ReduceLROnPlateauCallback, monitor='auroc', min_delta=0.01, patience=1, min_lr=1e-06, factor=0.10, mode='max'),
                                      partial(EarlyStoppingCallback, monitor='auroc', min_delta=0.001, patience=3, mode='max')])
                                      #partial(SaveModelCallback, every='improvement', monitor='accuracy', name='best')])#.to_fp16()
learn.loss_func = nn.CrossEntropyLoss()

In [None]:
learn.lr_find()

In [None]:
learn.recorder.plot()

In [None]:
learn.fit(4, 1e-3, wd=0.2)

In [None]:
preds_val = learn.get_preds(DatasetType.Valid)

In [None]:
preds_test = learn.get_preds(DatasetType.Test)