# Density estimation
In this case, we model $p(t|x,y)$ by a neural network. In the pet adoption case, $x$ is the feature of the pet and $y$ is the adoption speed. The $t$ is the type of the pet, i.e., cat or dog.

## 0.Data preparation

In [54]:
import pandas as pd
path = '/app/code'
# path='.'

In [55]:
# Load data from csv
data_df = pd.read_csv(path+'/data/train/train.csv')
# data_df.columns
cols_to_drop = ['Name','RescuerID','VideoAmt','Description','PetID','PhotoAmt']
data_df.drop(cols_to_drop, axis=1, inplace=True)
data_df['Type'] -= 1

In [56]:
# data_df = data_df[data_df['Type'] == 1]

In [57]:
data_df.head()

Unnamed: 0,Type,Age,Breed1,Breed2,Gender,Color1,Color2,Color3,MaturitySize,FurLength,Vaccinated,Dewormed,Sterilized,Health,Quantity,Fee,State,AdoptionSpeed
0,1,3,299,0,1,1,7,0,1,1,2,2,2,1,1,100,41326,2
1,1,1,265,0,1,1,2,0,2,2,3,3,3,1,1,0,41401,0
2,0,1,307,0,1,2,7,0,2,2,1,1,2,1,1,0,41326,3
3,0,4,307,0,2,1,2,0,2,1,1,1,2,1,1,150,41401,2
4,0,1,307,0,1,1,0,0,2,1,2,2,2,1,1,0,41326,2


## 1. Neural network
Now we have the data to estimate $p(t|x,y)$ where $t$ is the type of the pet, $y$ is the adoption speed and $x$ is the remaining columns in data_df. We use a neural network to model $p(t|x,y)$.

In [58]:
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.optim as torch_optim
import torch.nn as nn
import torch.nn.functional as F

In [59]:
# Embedding the categorical variables using nn.Embedding
cat_cols = ['Breed1','Breed2','Color1','Color2','Color3','State']
from sklearn.preprocessing import LabelEncoder

label_encoders = {}
for cat_col in cat_cols:
    label_encoders[cat_col] = LabelEncoder()
    data_df[cat_col] = label_encoders[cat_col].fit_transform(data_df[cat_col])
    
emb_c = {n: len(col.unique()) for n,col in data_df.items() if n in cat_cols}
emb_cols = emb_c.keys() # names of columns chosen for embedding
emb_szs = [(c, min(10, (c+1)//2)) for _,c in emb_c.items()] #embedding sizes for the chosen columns


In [60]:
# Split data into train and validation
train_df = data_df.iloc[:len(data_df)*4//5, :]
valid_df = data_df.iloc[len(data_df)*4//5:, :]
train_df.shape, valid_df.shape

X_train = train_df.drop(columns='Type')
y_train = train_df['Type']
X_valid = valid_df.drop(columns='Type')
y_valid = valid_df['Type']

n_cont = len(X_train.columns)-len(emb_cols) # number of continuous columns

In [61]:
class PetFinderData(Dataset):
    def __init__(self, X, Y, emb_cols):
        X = X.copy()
        self.X1 = torch.tensor(X.loc[:,emb_cols].copy().values).long() #categorical columns
        self.X2 = torch.tensor(X.drop(columns=emb_cols).copy().values).float() #numerical columns
        self.y = torch.tensor(Y.values).to(torch.float32)
    def __len__(self):
        return len(self.y)
    
    def __getitem__(self, idx):
        return self.X1[idx], self.X2[idx], self.y[idx]

In [62]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device('cuda')
    else:
        return torch.device('cpu')
def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list,tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

class DeviceDataLoader():
    """Wrap a dataloader to move data to a device"""
    def __init__(self, dl, device):
        self.dl = dl
        self.device = device
        
    def __iter__(self):
        """Yield a batch of data after moving it to device"""
        for b in self.dl: 
            yield to_device(b, self.device)

    def __len__(self):
        """Number of batches"""
        return len(self.dl)

In [63]:
class PetFinderModel(nn.Module):
    def __init__(self, embedding_sizes, n_cont):
        super().__init__()
        self.embeddings = nn.ModuleList([nn.Embedding(categories, size) for categories,size in embedding_sizes])
        n_emb = sum(e.embedding_dim for e in self.embeddings) #length of all embeddings combined
        self.n_emb, self.n_cont = n_emb, n_cont
        self.lin1 = nn.Linear(self.n_emb + self.n_cont, 200)
        self.lin2 = nn.Linear(200, 30)
        self.lin3 = nn.Sequential(nn.Linear(30, 1), nn.Softmax())
        self.bn1 = nn.BatchNorm1d(self.n_cont)
        self.bn2 = nn.BatchNorm1d(200)
        self.bn3 = nn.BatchNorm1d(30)
        self.emb_drop = nn.Dropout(0.6)
        self.drops = nn.Dropout(0.3)
        

    def forward(self, x_cat, x_cont):
        x = [e(x_cat[:,i]) for i,e in enumerate(self.embeddings)]
        x = torch.cat(x, 1)
        # x = self.emb_drop(x)
        x2 = self.bn1(x_cont)
        x = torch.cat([x, x2], 1)
        x = F.relu(self.lin1(x))
        # x = self.drops(x)
        # x = self.bn2(x)
        x = F.relu(self.lin2(x))
        # x = self.drops(x)
        # x = self.bn3(x)
        x = self.lin3(x)
        # x = self.lin4(x)
        return x

In [64]:
def get_optimizer(model, lr = 0.0001, wd = 0.0):
    optim = torch_optim.Adam(model.parameters(), lr=lr, weight_decay=wd)
    return optim

In [65]:
def train_model(model, optim, train_dl):
    model.train()
    total = 0
    sum_loss = 0
    for x1, x2, y in train_dl:
        batch = y.shape[0]
        output = model(x1, x2)
        # loss = F.mse_loss(output, y.view(-1,1))
        # print(torch.sigmoid(output))
        # print(y.view(-1,1))
        # print(output)
        loss = F.mse_loss(output, y.view(-1,1))
        optim.zero_grad()
        loss.backward()
        optim.step()
        total += batch
        sum_loss += batch*(loss.item())
    return sum_loss/total

In [66]:
def val_loss(model, valid_dl):
    model.eval()
    total = 0
    sum_loss = 0
    correct = 0
    for x1, x2, y in valid_dl:
        current_batch_size = y.shape[0]
        output = model(x1, x2)
        # loss = F.BCELoss(out, y.view(-1,1))
        loss = F.mse_loss(output, y.view(-1,1))
        sum_loss += current_batch_size*(loss.item())
        total += current_batch_size
        # print(output)
        pred = torch.max(output, 1)[1]
        # print(pred)
        correct += (pred == y).float().sum().item()
        # print(correct)
        break
    print("valid loss %.3f and accuracy %.3f" % (sum_loss/total, correct/total))
    return sum_loss/total, correct/total

In [67]:
def train_loop(model, epochs, lr=0.01, wd=0.01, train_dl=None, valid_dl=None):
    optim = get_optimizer(model, lr = lr, wd = wd)
    for i in range(epochs): 
        loss = train_model(model, optim, train_dl)
        print("training loss: ", loss)
        val_loss(model, valid_dl)

In [68]:
model = PetFinderModel(emb_szs, n_cont)
device = get_default_device()
to_device(model, device)

PetFinderModel(
  (embeddings): ModuleList(
    (0): Embedding(176, 10)
    (1): Embedding(135, 10)
    (2-3): 2 x Embedding(7, 4)
    (4): Embedding(6, 3)
    (5): Embedding(14, 7)
  )
  (lin1): Linear(in_features=49, out_features=200, bias=True)
  (lin2): Linear(in_features=200, out_features=30, bias=True)
  (lin3): Sequential(
    (0): Linear(in_features=30, out_features=1, bias=True)
    (1): Softmax(dim=None)
  )
  (bn1): BatchNorm1d(11, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn2): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (bn3): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (emb_drop): Dropout(p=0.6, inplace=False)
  (drops): Dropout(p=0.3, inplace=False)
)

In [69]:

train_ds = PetFinderData(X_train, y_train, emb_cols)
valid_ds = PetFinderData(X_valid, y_valid, emb_cols)

# Get data into device
batch_size = 128
train_dl = DataLoader(train_ds, batch_size=batch_size,shuffle=True)
valid_dl = DataLoader(valid_ds, batch_size=batch_size,shuffle=True)

train_dl = DeviceDataLoader(train_dl, device)
valid_dl = DeviceDataLoader(valid_dl, device)



In [70]:
train_loop(model, epochs=10, lr=0.0005, wd=0.00001, train_dl=train_dl, valid_dl=valid_dl)

  return self._call_impl(*args, **kwargs)


training loss:  0.5432716358377871
valid loss 0.555 and accuracy 0.555
training loss:  0.5432716360266295
valid loss 0.516 and accuracy 0.516
training loss:  0.5432716357781526
valid loss 0.562 and accuracy 0.562
training loss:  0.5432716359968124
valid loss 0.578 and accuracy 0.578
training loss:  0.543271635827848
valid loss 0.531 and accuracy 0.531
training loss:  0.5432716360266295
valid loss 0.555 and accuracy 0.555
training loss:  0.5432716358079699
valid loss 0.602 and accuracy 0.602
training loss:  0.5432716358179089
valid loss 0.461 and accuracy 0.461
training loss:  0.5432716358328176
valid loss 0.523 and accuracy 0.523
training loss:  0.5432716358079699
valid loss 0.578 and accuracy 0.578


In [71]:
F.cross_entropy(torch.rand(100,1), torch.ones(100,1))

tensor(-0.)