In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import torch
import numpy as np
from time import time
import json

from torch import nn
from tqdm import tqdm
import joblib
import pandas as pd
from datasets import WordNetDataset

device = 'cuda:0' if torch.cuda.is_available() else 'cpu:0'
with open(f'exps/rf_Attrition/config_feat.json','r') as f:
    config = json.loads(f.read())
num_clf = 35


poincard_embs = []
for i in range(num_clf):
    root_exp = f'exps/rf_Attrition_feat/{i:03d}'
    os.makedirs(root_exp, exist_ok=True)
    model = torch.load(os.path.join(root_exp, 'model.pt'))
    poincard_embs.append(model.embedding.weight.cpu().detach().numpy())
    
rf_clf = joblib.load("random_forest/my_random_forest.joblib")


In [3]:
X_train = np.load('data/Attrition/X_train.npy')
X_test = np.load('data/Attrition/X_test.npy')
y_train = np.load('data/Attrition/y_train.npy')
y_test = np.load('data/Attrition/y_test.npy')

In [4]:
len(rf_clf.estimators_)

35

In [4]:


# data = WordNetDataset(filename=config['data'],neg_samples=config['neg_samples'])
# data.root_idx,config['data'],data.raw_data

# raw_data = data.raw_data
# A,B = zip(*raw_data) #get As and Bs from raw data 
# items = tuple(set(A+B)) #individual items from dataset
# items

In [6]:

embs_test = [[[] for _ in range(len(rf_clf.estimators_))] for _ in range(len(X_test))]

for i,tree_clf in enumerate(rf_clf.estimators_[:]):
    # print(i)
    config['data'] = f'data/rf_Attrition/{i:03d}/data_closure_feat.tsv'
    data = WordNetDataset(filename=config['data'],neg_samples=config['neg_samples'])
    nodeid2embid = {int(k.split('_')[1]):v for k,v in data.item2id.items()}
    
    paths = tree_clf.decision_path(X_test).toarray()
    
    embs = []
    for j,path in enumerate(paths):
        nodeid = np.where(path==1)[0]
        embid = [nodeid2embid[id] for id in nodeid]

        emb = poincard_embs[i][embid]
        embs_test[j][i] = emb

In [35]:
# i = 2
# config['data'] = f'data/rf_Attrition/{i:03d}/data_closure.tsv'
# data = WordNetDataset(filename=config['data'],neg_samples=config['neg_samples'])
# nodeid2embid = {int(k.split('_')[1]):v for k,v in data.item2id.items()}
# poincard_embs[i][nodeid2embid[0]]

In [7]:
i = 1
config['data'] = f'data/rf_Attrition/{i:03d}/data_closure_feat.tsv'
data = WordNetDataset(filename=config['data'],neg_samples=config['neg_samples'])
nodeid2embid = {int(k.split('_')[1]):v for k,v in data.item2id.items()}

paths = tree_clf.decision_path(X_train).toarray()

embs_train[1][i],embs_train[2][i]

path = paths[2]
nodeid = np.where(path==1)[0]
embid = [nodeid2embid[id] for id in nodeid]
emb = poincard_embs[i][embid]
emb

NameError: name 'embs_train' is not defined

In [8]:

embs_train = [[[] for _ in range(len(rf_clf.estimators_))] for _ in range(len(X_train))]

for i,tree_clf in enumerate(rf_clf.estimators_[:]):
    # print(i)
    config['data'] = f'data/rf_Attrition/{i:03d}/data_closure_feat.tsv'
    data = WordNetDataset(filename=config['data'],neg_samples=config['neg_samples'])
    nodeid2embid = {int(k.split('_')[1]):v for k,v in data.item2id.items()}
    
    paths = tree_clf.decision_path(X_train).toarray()
    
    embs = []
    for j,path in enumerate(paths):
        nodeid = np.where(path==1)[0]
        embid = [nodeid2embid[id] for id in nodeid]

        emb = poincard_embs[i][embid]
        embs_train[j][i] = emb

In [9]:

import torch.nn as nn
import torch.nn.functional as F
class Attention(nn.Module):

    def __init__(self, input_size, hidden_size):
        super(Attention, self).__init__()
        self.i2h = nn.Linear(input_size, hidden_size, bias=True)
        self.score = nn.Linear(hidden_size, 1, bias=False)
        self.hidden_size = hidden_size

    def forward(self, batch_H):
        # [batch_size x num_encoder_step x num_channel] -> [batch_size x num_encoder_step x hidden_size]
        batch_H_proj = self.i2h(batch_H)
        e = self.score(torch.tanh(batch_H_proj))  # batch_size x num_encoder_step * 1

        alpha = F.softmax(e, dim=1)
        context = torch.bmm(alpha.permute(0, 2, 1), batch_H).squeeze(1)  
        return context

    
class PoincareTreeEmbedding(nn.Module):
    def __init__(self, input_size, hidden_size):
        super().__init__()
        self.rnn = nn.LSTM(input_size, hidden_size, bidirectional=True, batch_first=True)
        self.attention = Attention(hidden_size*2, hidden_size)
        
    def forward(self, x):
        x,_ = self.rnn(x)
        x = self.attention(x)
        # print(x).shape
        return x
    
class PoincareClassifier(nn.Module):
    def __init__(self,input_size, hidden_size, num_tree = 35, num_classes=2):
        super().__init__()
        self.poincare_tree_embedding = PoincareTreeEmbedding(input_size = input_size, hidden_size = hidden_size)
        self.classifier = nn.Sequential(
            nn.Linear(hidden_size * 2 * num_tree, hidden_size),
            nn.BatchNorm1d(hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, num_classes)
        )
        
    def forward(self, X):
        # X: tree x image x path x 2
        Xs = []
        for x in X:
            xs = []
            for path in x:
                path = torch.from_numpy(path[None]).float().cuda()
                # print(path.shape)
                x = self.poincare_tree_embedding(path)
                xs.append(x)
            xs = torch.cat(xs, dim=1)
            Xs.append(xs)
        Xs = torch.cat(Xs, dim=0)
        # print(Xs)
        out = self.classifier(Xs)
        return out
        
        
num_tree = 35
model = PoincareClassifier(input_size = 2, hidden_size = 4).cuda()
input = np.random.randn(2,num_tree, 5,2)
y = torch.ones(1)
x = model(input)
x.shape

torch.Size([2, 2])

In [10]:
import torch.optim as optim
import sklearn
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

num_epochs = 20
batch_size = 64
for epoch in range(num_epochs):  # loop over the dataset multiple times

    running_loss = None
    running_acc = None
    idxes = np.arange(len(X_train))
    np.random.shuffle(idxes)
    
    pbar = tqdm(np.array_split(idxes, len(X_train) // batch_size))
    model.train()
    for i, idx in enumerate(pbar):
        inputs = [embs_train[j] for j in idx]
        labels = torch.from_numpy(y_train[idx]).cuda().long()

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        prob = F.softmax(outputs, dim=1)[:,1]
        pred = (prob > 0.5).float()
        acc = torch.mean(torch.eq(labels,pred).float())
        
        # print statistics
        running_loss = 0.8*running_loss + 0.2* loss.item() if running_loss is not None else loss.item()
        running_acc = 0.8*running_acc + 0.2* acc.item() if running_acc is not None else acc.item()
        
        if i % 1 == 0:    # print every 2000 mini-batches
            s = f'{epoch + 1}| {i + 1:3d} loss: {running_loss / 1:.5f} acc: {running_acc / 1:.4f} avg_acc: {1-labels.float().mean().item() / 1:.4f}'
            running_loss = 0.0
            pbar.set_description(s)
            
    
    idxes = np.arange(len(X_test))
    
    model.eval()
    pbar = tqdm(np.array_split(idxes, len(X_test) // batch_size))
    targets = []
    probs = []
    for i, idx in enumerate(pbar):
        inputs = [embs_test[j] for j in idx]
        labels = torch.from_numpy(y_test[idx]).cuda().long()
        
        with torch.no_grad():
            outputs = model(inputs)
        prob = F.softmax(outputs, dim=1)[:,1]
        
        targets.extend(y_train[idx])
        probs.extend(prob.cpu().detach().numpy())
        
    targets = np.array(targets)
    preds = np.array(probs)>0.5
    acc = np.mean(preds == targets)
    auc = sklearn.metrics.roc_auc_score(targets, probs)
    confusion_matrix = sklearn.metrics.confusion_matrix(targets, preds)
    print(f'acc: {acc:.3f} auc: {auc:.3f}')
    print(confusion_matrix)

print('Finished Training')

1|  16 loss: 0.13219 acc: 0.6791 avg_acc: 0.8906: 100%|██████████| 16/16 [00:44<00:00,  2.78s/it]
100%|██████████| 6/6 [00:06<00:00,  1.08s/it]


acc: 0.839 auc: 0.486
[[370   0]
 [ 71   0]]


2|  16 loss: 0.12238 acc: 0.7756 avg_acc: 0.8281: 100%|██████████| 16/16 [00:44<00:00,  2.78s/it]
100%|██████████| 6/6 [00:06<00:00,  1.09s/it]


acc: 0.837 auc: 0.500
[[369   1]
 [ 71   0]]


3|  16 loss: 0.10861 acc: 0.8237 avg_acc: 0.8750: 100%|██████████| 16/16 [00:44<00:00,  2.78s/it]
100%|██████████| 6/6 [00:06<00:00,  1.08s/it]


acc: 0.819 auc: 0.510
[[356  14]
 [ 66   5]]


4|  16 loss: 0.10377 acc: 0.8331 avg_acc: 0.8594: 100%|██████████| 16/16 [00:44<00:00,  2.78s/it]
100%|██████████| 6/6 [00:06<00:00,  1.08s/it]


acc: 0.794 auc: 0.509
[[342  28]
 [ 63   8]]


5|  16 loss: 0.10529 acc: 0.8512 avg_acc: 0.7969: 100%|██████████| 16/16 [00:44<00:00,  2.78s/it]
100%|██████████| 6/6 [00:06<00:00,  1.07s/it]


acc: 0.789 auc: 0.508
[[341  29]
 [ 64   7]]


6|  16 loss: 0.09423 acc: 0.8662 avg_acc: 0.7188: 100%|██████████| 16/16 [00:44<00:00,  2.78s/it]
100%|██████████| 6/6 [00:06<00:00,  1.09s/it]


acc: 0.798 auc: 0.508
[[345  25]
 [ 64   7]]


7|  16 loss: 0.08606 acc: 0.8668 avg_acc: 0.8750: 100%|██████████| 16/16 [00:44<00:00,  2.77s/it]
100%|██████████| 6/6 [00:06<00:00,  1.08s/it]


acc: 0.812 auc: 0.510
[[351  19]
 [ 64   7]]


8|  16 loss: 0.08212 acc: 0.8759 avg_acc: 0.8906: 100%|██████████| 16/16 [00:44<00:00,  2.79s/it]
100%|██████████| 6/6 [00:06<00:00,  1.08s/it]


acc: 0.814 auc: 0.512
[[353  17]
 [ 65   6]]


9|  16 loss: 0.08138 acc: 0.8634 avg_acc: 0.7812: 100%|██████████| 16/16 [00:44<00:00,  2.78s/it]
100%|██████████| 6/6 [00:06<00:00,  1.08s/it]


acc: 0.816 auc: 0.514
[[354  16]
 [ 65   6]]


10|  16 loss: 0.07733 acc: 0.8830 avg_acc: 0.8281: 100%|██████████| 16/16 [00:44<00:00,  2.78s/it]
100%|██████████| 6/6 [00:06<00:00,  1.08s/it]


acc: 0.814 auc: 0.517
[[354  16]
 [ 66   5]]


11|  16 loss: 0.08899 acc: 0.8604 avg_acc: 0.7969: 100%|██████████| 16/16 [00:44<00:00,  2.77s/it]
100%|██████████| 6/6 [00:06<00:00,  1.08s/it]


acc: 0.814 auc: 0.518
[[354  16]
 [ 66   5]]


12|  16 loss: 0.08598 acc: 0.8540 avg_acc: 0.7969: 100%|██████████| 16/16 [00:44<00:00,  2.78s/it]
100%|██████████| 6/6 [00:06<00:00,  1.07s/it]


acc: 0.816 auc: 0.519
[[355  15]
 [ 66   5]]


13|  16 loss: 0.08377 acc: 0.8572 avg_acc: 0.8125: 100%|██████████| 16/16 [00:44<00:00,  2.78s/it]
100%|██████████| 6/6 [00:06<00:00,  1.08s/it]


acc: 0.816 auc: 0.520
[[355  15]
 [ 66   5]]


14|  16 loss: 0.08593 acc: 0.8634 avg_acc: 0.7969: 100%|██████████| 16/16 [00:44<00:00,  2.77s/it]
100%|██████████| 6/6 [00:06<00:00,  1.07s/it]


acc: 0.814 auc: 0.519
[[354  16]
 [ 66   5]]


15|  16 loss: 0.07563 acc: 0.8795 avg_acc: 0.8594: 100%|██████████| 16/16 [00:43<00:00,  2.75s/it]
100%|██████████| 6/6 [00:06<00:00,  1.06s/it]


acc: 0.816 auc: 0.519
[[354  16]
 [ 65   6]]


16|  16 loss: 0.07713 acc: 0.8898 avg_acc: 0.8281: 100%|██████████| 16/16 [00:43<00:00,  2.74s/it]
100%|██████████| 6/6 [00:06<00:00,  1.07s/it]


acc: 0.816 auc: 0.519
[[354  16]
 [ 65   6]]


17|  16 loss: 0.06590 acc: 0.8695 avg_acc: 0.8438: 100%|██████████| 16/16 [00:44<00:00,  2.76s/it]
100%|██████████| 6/6 [00:06<00:00,  1.08s/it]


acc: 0.814 auc: 0.522
[[354  16]
 [ 66   5]]


18|  16 loss: 0.05784 acc: 0.8852 avg_acc: 0.9375: 100%|██████████| 16/16 [00:44<00:00,  2.76s/it]
100%|██████████| 6/6 [00:06<00:00,  1.06s/it]


acc: 0.816 auc: 0.521
[[354  16]
 [ 65   6]]


19|  16 loss: 0.07744 acc: 0.8680 avg_acc: 0.8750: 100%|██████████| 16/16 [00:44<00:00,  2.76s/it]
100%|██████████| 6/6 [00:06<00:00,  1.06s/it]


acc: 0.816 auc: 0.523
[[354  16]
 [ 65   6]]


20|  16 loss: 0.06535 acc: 0.8791 avg_acc: 0.8281: 100%|██████████| 16/16 [00:43<00:00,  2.75s/it]
100%|██████████| 6/6 [00:06<00:00,  1.06s/it]

acc: 0.814 auc: 0.520
[[354  16]
 [ 66   5]]
Finished Training



