In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import torch
import numpy as np
from time import time
import json

from datasets import WordNetDataset
from poincare import PoincareDistance, PoincareEmbedding, RiemannianSGD
# from vis import generate_report
from torch import nn
from tqdm import tqdm

index2color = {i:"#"+''.join([np.random.choice(list('0123456789ABCDEF')) for j in range(6)])
             for i in range(400)}
    
def generate_poincare(model,data, root_exp, root_data, show_feat=False, epoch=0, loss=0):
  
    with open(f'{root_data}/data_hierarchy.tsv','r') as f:
        edgelist = [line.strip().split('\t') for line in f.readlines()]
        
    with open(f'{root_data}/data_feat.tsv','r') as f:
        featlist = [line.strip().split('\t') for line in f.readlines()]
        
    vis = model.embedding.weight.cpu().data.numpy()
  
  
    edge_trace = go.Scatter(
        x=[],
        y=[],
        line=dict(width=0.5,color='#888'),
        hoverinfo='none',
        mode='lines')
    
    feat_trace = go.Scatter(
        x=[],
        y=[],
        line=dict(width=0.5,color='#d43928'),
        hoverinfo='none',
        mode='lines')
  
    xs = []
    ys = []
    for s0,s1 in edgelist:
        x0, y0 = vis[data.item2id[s0]]
        x1, y1 = vis[data.item2id[s1]]
  
        xs.extend(tuple([x0, x1, None]))
        ys.extend(tuple([y0, y1, None]))
        
    xs_feat = []
    ys_feat = []
    for s0,s1 in featlist:
        x0, y0 = vis[data.item2id[s0]]
        x1, y1 = vis[data.item2id[s1]]
  
        xs_feat.extend(tuple([x0, x1, None]))
        ys_feat.extend(tuple([y0, y1, None]))
  
    edge_trace['x'] = xs
    edge_trace['y'] = ys
    
    feat_trace['x'] = xs_feat
    feat_trace['y'] = ys_feat
  

  
    xs = []
    ys = []
    names = []
    node_traces = []
    depths = np.array([int(name.split('_')[-2]) for name in data.items])
    name_nodes = list(set([name.split('_')[0] for name in data.items]))
    sizes = 1.5**(np.max(depths) - depths) + 2
    max_size = 100
    sizes = sizes / np.max(sizes) * max_size
    
    number_of_colors = len(name_nodes)

    
    for i,name in enumerate(data.items):
        x, y = vis[data.item2id[name]]
        xs.extend(tuple([x]))
        ys.extend(tuple([y]))
        names.extend(tuple([name.split('_')[0]]))
        threshold = name.split('_')[-1]
        name_node = name.split('_')[0]
        
        info_node = f'{name_node}, depth: {depths[i]}, threshold: {threshold}'
        size = sizes[i]
        
        node_trace = go.Scatter(
        x=[],
        y=[],
        text=[],
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=False,
            reversescale=True,
            color=index2color[i],
            size=size),legendgroup=i)
  
        node_trace['x'] = [x]
        node_trace['y'] = [y]
        
        node_trace['text'] = info_node
        
        node_traces.append(node_trace)
    node_traces = node_traces[::-1]
        
  
    display_list = np.random.choice(data.items, 1)
    display_list = data.items
    display_list = []
  
    label_trace = go.Scatter(
        x=[],
        y=[],
        mode='text',
        text=[],
        textposition='top center',
        textfont=dict(
            family='sans serif',
            size=13,
            color = "#000000"
        )
    )
  
    for name in display_list:
        x,y = vis[data.item2id[name]]
        label_trace['x'] += tuple([x])
        label_trace['y'] += tuple([y])
        label_trace['text'] += tuple([name.split('_')[0]])
  
  
    data_trace = [edge_trace, *node_traces,label_trace] 
    if show_feat:
        data_trace.append(feat_trace)
    fig = go.Figure(data=data_trace,
                 layout=go.Layout(
                    title=f'Poincare Embedding of Decision Tree, Loss: {loss:.4f}',
                    width=700,
                    height=700,
                    titlefont=dict(size=16),
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
    fig.add_shape(type="circle",
        xref="x", yref="y",
        x0=-1, y0=-1, x1=1, y1=1,
        line_color="LightSeaGreen",
    )
    dst = f'{root_exp}/vis/poincare embedding_{epoch:05d}.png'
    os.makedirs(os.path.dirname(dst), exist_ok=True)
    fig.write_image(dst)
    
    
import torch
import plotly.offline as plt
import plotly.graph_objs as go

def generate_report(model,data, root_exp, root_data, show_feat=True):
    print('Generating report...')
  
    x,y = zip(*enumerate(model.log))
  
    trace = go.Scatter(x = x,
                       y = y
                      )
    layout = go.Layout(
                 yaxis=dict(
                      title= 'Loss',
                      ticklen= 5,
                      gridwidth= 2,
                      ),
                 xaxis=dict(
                      title= 'Epoch',
                      ticklen= 5,
                      gridwidth= 2,
                     ))
  
    fig = go.Figure([trace],layout=layout)
    plt.plot(fig,filename=f'{root_exp}/log_loss.html')
  
  
  
    with open(f'{root_data}/data_hierarchy.tsv','r') as f:
        edgelist = [line.strip().split('\t') for line in f.readlines()]
        
    with open(f'{root_data}/data_feat.tsv','r') as f:
        featlist = [line.strip().split('\t') for line in f.readlines()]
        
    vis = model.embedding.weight.data.numpy()
  
  
    edge_trace = go.Scatter(
        x=[],
        y=[],
        line=dict(width=0.5,color='#888'),
        hoverinfo='none',
        mode='lines')
    
    feat_trace = go.Scatter(
        x=[],
        y=[],
        line=dict(width=0.5,color='#d43928'),
        hoverinfo='none',
        mode='lines')
  
    xs = []
    ys = []
    for s0,s1 in edgelist:
        x0, y0 = vis[data.item2id[s0]]
        x1, y1 = vis[data.item2id[s1]]
  
        xs.extend(tuple([x0, x1, None]))
        ys.extend(tuple([y0, y1, None]))
        
    xs_feat = []
    ys_feat = []
    for s0,s1 in featlist:
        x0, y0 = vis[data.item2id[s0]]
        x1, y1 = vis[data.item2id[s1]]
  
        xs_feat.extend(tuple([x0, x1, None]))
        ys_feat.extend(tuple([y0, y1, None]))
  
    edge_trace['x'] = xs
    edge_trace['y'] = ys
    
    feat_trace['x'] = xs_feat
    feat_trace['y'] = ys_feat
  

  
    xs = []
    ys = []
    names = []
    node_traces = []
    depths = np.array([int(name.split('_')[-2]) for name in data.items])
    name_nodes = list(set([name.split('_')[0] for name in data.items]))
    sizes = 1.5**(np.max(depths) - depths) + 2
    max_size = 100
    sizes = sizes / np.max(sizes) * max_size
    
    number_of_colors = len(name_nodes)

    name2color = {name_nodes[i]:"#"+''.join([np.random.choice(list('0123456789ABCDEF')) for j in range(6)])
                 for i in range(number_of_colors)}
    
    for i,name in enumerate(data.items):
        x, y = vis[data.item2id[name]]
        xs.extend(tuple([x]))
        ys.extend(tuple([y]))
        names.extend(tuple([name.split('_')[0]]))
        threshold = name.split('_')[-1]
        name_node = name.split('_')[0]
        
        info_node = f'{name_node}, depth: {depths[i]}, threshold: {threshold}'
        size = sizes[i]
        
        node_trace = go.Scatter(
        x=[],
        y=[],
        text=[],
        mode='markers',
        hoverinfo='text',
        marker=dict(
            showscale=False,
            reversescale=True,
            color=name2color[name_node],
            size=size),legendgroup=i)
  
        node_trace['x'] = [x]
        node_trace['y'] = [y]
        
        node_trace['text'] = info_node
        
        node_traces.append(node_trace)
    node_traces = node_traces[::-1]
        
  
    display_list = np.random.choice(data.items, 1)
    display_list = data.items
    display_list = []
  
    label_trace = go.Scatter(
        x=[],
        y=[],
        mode='text',
        text=[],
        textposition='top center',
        textfont=dict(
            family='sans serif',
            size=13,
            color = "#000000"
        )
    )
  
    for name in display_list:
        x,y = vis[data.item2id[name]]
        label_trace['x'] += tuple([x])
        label_trace['y'] += tuple([y])
        label_trace['text'] += tuple([name.split('_')[0]])
  
  
    data_trace = [edge_trace, *node_traces,label_trace] 
    if show_feat:
        data_trace.append(feat_trace)
    fig = go.Figure(data=data_trace,
                 layout=go.Layout(
                    title='Poincare Embedding of Decision Tree',
                    width=700,
                    height=700,
                    titlefont=dict(size=16),
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=20,l=5,r=5,t=40),
                    xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
                    yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)))
    fig.add_shape(type="circle",
        xref="x", yref="y",
        x0=-1, y0=-1, x1=1, y1=1,
        line_color="LightSeaGreen",
    )
    plt.plot(fig, filename=f'{root_exp}/poincare embedding.html')
  
    print('report is saves as .html files in demo folder.')

In [None]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152
os.environ["CUDA_VISIBLE_DEVICES"]="0"

import torch
import numpy as np
from time import time
import json

from datasets import WordNetDataset
from poincare import PoincareDistance, PoincareEmbedding, RiemannianSGD
# from vis import generate_report
from torch import nn
from tqdm import tqdm

device = 'cuda:0' if torch.cuda.is_available() else 'cpu:0'
with open(f'exps/rf_Attrition/config.json','r') as f:
    config = json.loads(f.read())
num_clf = 35
print(config['n_epochs'])
for i in range(num_clf):
    print(i)
    root_exp = f'exps/rf_Attrition/{i:03d}'
    os.makedirs(root_exp, exist_ok=True)
        
    root_exp = f'exps/rf_Attrition/{i:03d}'
    config['data'] = f'data/rf_Attrition/{i:03d}/data_closure.tsv'
    data = WordNetDataset(filename=config['data'],neg_samples=config['neg_samples'])
    dataloader = torch.utils.data.DataLoader(data,batch_size=config['batch_size'], shuffle=True)


    torch.save(data,f'{root_exp}/data.pt')
    model = PoincareEmbedding(data.n_items, root_idx=data.root_idx)

    model.initialize_embedding()

    model = model.to(device)
    # model = nn.DataParallel(model)
    # model = model.to(device)

    optimizer = RiemannianSGD(model.parameters())

    total_time = 0
    n_epochs = config['n_epochs']
    pbar = tqdm(range(n_epochs))
    for epoch in pbar:
        epoch_loss = []
        start = time()

        if epoch<config['n_burn_in']:
            lr = config['lr']/config['c']
        else:
            lr = config['lr']

        for batch in dataloader:
            optimizer.zero_grad()

            x,y = batch

            x = x.to(device)
            y = y.to(device)

            preds = model(x,y)
            loss = model.loss(preds)

            loss.backward()
            optimizer.step(lr=lr)

            epoch_loss.append(loss.data.item())


        time_per_epoch = time()-start
        total_time += time_per_epoch

        model.log.append(np.mean(epoch_loss))
        
        if epoch %500 == 0:
            generate_poincare(model, data,root_exp, os.path.dirname(config['data']), epoch=epoch, loss=loss.data)

        if epoch %100 == 0:
            pbar.set_description(f'Epoch {epoch+1} / {n_epochs} | loss: {model.log[-1]:.4f} |')

    print('')
    print('Trainig finished!')
    torch.save(model,f'{root_exp}/model.pt')

    generate_report(model.cpu(), data,root_exp, os.path.dirname(config['data']))

50000
0


Epoch 17201 / 50000 | loss: 2.1340 |:  34%|███▍      | 17207/50000 [01:18<02:15, 242.45it/s]