## Load data

In [45]:
import pandas as pd

In [46]:
new_data = pd.read_csv("feature_eng_cat.csv")
new_data.drop(["Unnamed: 0"],axis=1,inplace=True)

In [47]:
new_data.columns

Index(['LEN', 'CALL_TYPE_A', 'CALL_TYPE_B', 'CALL_TYPE_C', 'DAY_TYPE_A',
       'DAY_TYPE_B', 'DAY_TYPE_C', 'MISSING', 'CALL_A_DAY_A', 'CALL_A_DAY_B',
       'CALL_A_DAY_C', 'CALL_B_DAY_A', 'CALL_B_DAY_B', 'CALL_B_DAY_C',
       'MORNING', 'AFTERNOON', 'EVENING', 'NIGHT', 'S1', 'S2', 'S3', 'S4',
       'HR_SIN', 'HR_COS', 'DAY_SIN', 'DAY_COS', 'WK_SIN', 'WK_COS', 'MON_SIN',
       'MON_COS', 'YR_DUMMY', 'CALL_A_MORN', 'CALL_A_AFTER', 'CALL_A_EVE',
       'CALL_A_NIT', 'CALL_B_MORN', 'CALL_B_AFTER', 'CALL_B_EVE', 'CALL_B_NIT',
       'CALL_C_MORN', 'CALL_C_AFTER', 'CALL_C_EVE', 'CALL_C_NIT', 'CALL_A_S1',
       'CALL_A_S2', 'CALL_A_S3', 'CALL_A_S4', 'CALL_B_S1', 'CALL_B_S2',
       'CALL_B_S3', 'CALL_B_S4', 'CALL_C_S1', 'CALL_C_S2', 'CALL_C_S3',
       'CALL_C_S4', 'DAY_A_MORN', 'DAY_A_AFTER', 'DAY_A_EVE', 'DAY_A_NIT',
       'DAY_B_MORN', 'DAY_B_AFTER', 'DAY_B_EVE', 'DAY_B_NIT', 'DAY_C_MORN',
       'DAY_C_AFTER', 'DAY_C_EVE', 'DAY_C_NIT', 'DAY_A_S1', 'DAY_A_S2',
       'DAY_A_S3', 'DA

## Split train and test set

In [48]:
X = new_data.drop(['LEN'],axis=1)
y = new_data['LEN']

In [49]:
from sklearn.model_selection import train_test_split

x_train,x_test,y_train,y_test = train_test_split(X,y,test_size=0.2)

## Apply Deep Learning Model

In [50]:
import os
import torch
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
import torch.nn as nn
import numpy as np
import torch.optim as optim

In [51]:

category_names = ['CALL_TYPE_A', 'CALL_TYPE_B',
       'CALL_TYPE_C', 'DAY_TYPE_A', 'DAY_TYPE_B', 'DAY_TYPE_C', 'MISSING',
       'CALL_A_DAY_A', 'CALL_A_DAY_B', 'CALL_A_DAY_C', 'CALL_B_DAY_A',
       'CALL_B_DAY_B', 'CALL_B_DAY_C', 'MORNING', 'AFTERNOON', 'EVENING',
       'NIGHT', 'S1', 'S2', 'S3', 'S4', 'YR_DUMMY',
       'CALL_A_MORN', 'CALL_A_AFTER', 'CALL_A_EVE', 'CALL_A_NIT',
       'CALL_B_MORN', 'CALL_B_AFTER', 'CALL_B_EVE', 'CALL_B_NIT',
       'CALL_C_MORN', 'CALL_C_AFTER', 'CALL_C_EVE', 'CALL_C_NIT', 'CALL_A_S1',
       'CALL_A_S2', 'CALL_A_S3', 'CALL_A_S4', 'CALL_B_S1', 'CALL_B_S2',
       'CALL_B_S3', 'CALL_B_S4', 'CALL_C_S1', 'CALL_C_S2', 'CALL_C_S3',
       'CALL_C_S4', 'DAY_A_MORN', 'DAY_A_AFTER', 'DAY_A_EVE', 'DAY_A_NIT',
       'DAY_B_MORN', 'DAY_B_AFTER', 'DAY_B_EVE', 'DAY_B_NIT', 'DAY_C_MORN',
       'DAY_C_AFTER', 'DAY_C_EVE', 'DAY_C_NIT', 'DAY_A_S1', 'DAY_A_S2',
       'DAY_A_S3', 'DAY_A_S4', 'DAY_B_S1', 'DAY_B_S2', 'DAY_B_S3', 'DAY_B_S4',
       'DAY_C_S1', 'DAY_C_S2', 'DAY_C_S3', 'DAY_C_S4', 'CALL_A_MISS',
       'CALL_B_MISS', 'CALL_C_MISS', 'DAY_A_MISS', 'DAY_B_MISS', 'DAY_C_MISS','TAXI_ID_CAT','ORIGIN_CALL_CAT', 'ORIGIN_STAND_CAT']
continous_names = ['HR_SIN', 'HR_COS', 'DAY_SIN','DAY_COS', 'WK_SIN', 'WK_COS', 'MON_SIN', 'MON_COS']

* Load Dataset

In [52]:
train_dataset, test_dataset = TensorDataset(torch.from_numpy(x_train[category_names].values.astype(np.compat.long)),
                                            torch.from_numpy(x_train[continous_names].values.astype(np.float32)),
                                            torch.from_numpy(y_train.values)), \
                              TensorDataset(torch.from_numpy(x_test[category_names].values.astype(np.compat.long)),
                                            torch.from_numpy(x_test[continous_names].values.astype(np.float32)),
                                            torch.from_numpy(y_test.values))


In [53]:
train_dataloader, test_dataloader = DataLoader(train_dataset, batch_size=1024, shuffle=True), \
                                                      DataLoader(test_dataset, batch_size=1024, shuffle=False)

In [54]:
len(np.unique(new_data['ORIGIN_STAND_CAT']))

64

## Apply TabTransformer

In [55]:
from tab_transformer_pytorch import FTTransformer


categoryies =  [2] * (len(category_names)-3) + [448] + [57106] + [64]
# categoryies.insert(0,448)
num_continuous = len(continous_names)

model = FTTransformer(
    categories = tuple(categoryies),      # tuple containing the number of unique values within each category
    num_continuous = num_continuous,                # number of continuous values
    dim = 128,                           # dimension, paper set at 32
    dim_out = 1,                        # binary prediction, but could be anything
    depth = 10,                          # depth, paper recommended 6
    heads = 8,                          # heads, paper recommends 8
    attn_dropout = 0.2,                 # post-attention dropout
    ff_dropout = 0.2                    # feed forward dropout
)

In [56]:
def train(model,optimizer,lr_scheduler,train_loader,val_loader,epochs,output_path,device):
    model = model.to(device)

    loss_fn = nn.MSELoss()
    best_loss = np.inf
    with tqdm(total=epochs, desc=f'Training', postfix=dict, mininterval=0.3) as pbar:
        for epoch in range(epochs):
            train_total_loss = []
            model.train()
            for i,(x,x1,label) in enumerate(train_loader):
                x = x.to(device)
                x1 = x1.to(device)
                label = label.to(device)
                model_output = model(x,x1).squeeze(1)

                loss = loss_fn(model_output,label.float())

                optimizer.zero_grad()
                loss.backward()
                # torch.nn.utils.clip_grad_norm_(parameters=model.parameters(), max_norm=5, norm_type=2)
                optimizer.step()
                train_total_loss.append(loss.item())

            avg_train_loss = np.mean(train_total_loss)


            val_total_loss = []
            model.eval()
            for i,(x,x1,label) in enumerate(val_loader):
                x = x.to(device)
                x1 = x1.to(device)
                label = label.to(device)
                model_output = model(x,x1).squeeze(1)
                loss = loss_fn(model_output,label.float())


                val_total_loss.append(loss.item())


            avg_val_loss = np.mean(val_total_loss)


            pbar.set_postfix(**{'train_rmse_loss': round(np.sqrt(avg_train_loss),5),
                                'val_rmse_loss': round(np.sqrt(avg_val_loss),5)})
            pbar.update(1)

            if best_loss > avg_val_loss:
                best_loss = avg_val_loss
                save_name = os.path.join(output_path,f"Taxi_Epoch{epoch}_rmse{round(np.sqrt(avg_val_loss),5)}_withid.pth")
                torch.save(model.state_dict(),save_name)

            lr_scheduler.step()

In [57]:
from fvcore.nn import FlopCountAnalysis, parameter_count_table

categories =  [2] * (len(category_names)-3) + [448] + [57106] + [64]
num_continuous = len(continous_names)
model.cpu()
for x,x1,y in train_dataloader:
    dummy_input = (x,x1)
    break

# FLOPs
flops = FlopCountAnalysis(model, dummy_input)
print("FLOPs: ", flops.total()/1e9)

# parameters
print(parameter_count_table(model))

Unsupported operator aten::add encountered 22 time(s)
Unsupported operator aten::embedding encountered 1 time(s)
Unsupported operator aten::mul encountered 145 time(s)
Unsupported operator aten::softmax encountered 10 time(s)
Unsupported operator aten::gelu encountered 10 time(s)


FLOPs:  257.677421312
| name                         | #elements or shape   |
|:-----------------------------|:---------------------|
| model                        | 10.0M                |
|  cls_token                   |  (1, 1, 128)         |
|  categorical_embeds          |  7.4M                |
|   categorical_embeds.weight  |   (57772, 128)       |
|  numerical_embedder          |  2.0K                |
|   numerical_embedder.weights |   (8, 128)           |
|   numerical_embedder.biases  |   (8, 128)           |
|  transformer                 |  2.6M                |
|   transformer.layers         |   2.6M               |
|    transformer.layers.0      |    0.3M              |
|    transformer.layers.1      |    0.3M              |
|    transformer.layers.2      |    0.3M              |
|    transformer.layers.3      |    0.3M              |
|    transformer.layers.4      |    0.3M              |
|    transformer.layers.5      |    0.3M              |
|    transformer.layers.6 

In [59]:
device = torch.device("cuda:1")
optimizer = optim.Adam(model.parameters(), lr=0.1,weight_decay=5e-4)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.92)

epochs = 500
output_path = "./model/"

resume = False
if resume:
    ckpt = torch.load("/data/aaron/Homework/Taxi/model/Taxi_Epoch191_rmse662.93517_withid.pth")
    model.load_state_dict(ckpt)

train(model,optimizer,lr_scheduler,train_dataloader,test_dataloader,epochs,output_path,device)

Training:  25%|██▍       | 123/500 [13:45:44<42:10:56, 402.80s/it, train_rmse_loss=658, val_rmse_loss=671]


KeyboardInterrupt: 

* Test data

In [None]:
new_data = pd.read_csv("test_public_features_cat.csv")
new_data.drop(["Unnamed: 0"],axis=1,inplace=True)

public_test_dataset = TensorDataset(torch.from_numpy(new_data[category_names].values.astype(np.int8)),
                                            torch.from_numpy(new_data[continous_names].values.astype(np.float32)))

public_test_dataloader = DataLoader(public_test_dataset, batch_size=320, shuffle=False)

In [60]:
ckpt = torch.load("/data/aaron/Homework/Taxi/model/Taxi_Epoch104_rmse660.11802_withid.pth")
model.load_state_dict(ckpt)

<All keys matched successfully>

In [None]:
# predict the results
model.to(device)
model.eval()
with torch.no_grad():
    for i,(x,x1) in enumerate(public_test_dataloader):
        x = x.to(device)
        x1 = x1.to(device)
        output = model(x,x1).squeeze(1)

In [None]:
# load sample

sample = pd.read_csv("/data/aaron/Homework/Taxi/sample_xgboost.csv",index_col="TRIP_ID")
# sample.drop(["Unnamed: 0"],axis=1,inplace=True)

sample['TRAVEL_TIME'] = output.cpu().detach().numpy()

In [None]:
sample.to_csv("sample_transformer.csv")