In [None]:
import os
import pandas as pd

import pyscisci.all as pyscisci

from pyscisci.embedding import Node2Vec

path2dblp = '/u/yoonjis/ember_home/DBLP_new' #put yout own DBLP path here
path2dblp = '/users/hgt6rn/Documents/DataSets/DBLP'
mydblp = pyscisci.DBLP(path2database= path2dblp, keep_in_memory=False, show_progress=True)

#a2p  = mydblp.author2pub 

In [None]:
# coauthorship network of Albert-Laszlo Barabasi and Mark E. J. Newman
author  = mydblp.author 
target_researcher = ['Albert-Laszlo Barabasi', 'Mark E. J. Newman']
target_index = list(author[author.FullName.isin(target_researcher)].AuthorId)
coauthornet, author2int = pyscisci.coauthorship_network(a2p , focus_author_ids = target_index, focus_constraint='ego', show_progress=True)

# 1. Get Node2Vec Embedding

In [None]:

model = Node2Vec(coauthornet, author2int)
emb = model.learn_embedding()

In [None]:
ids = list(author2int.keys())
id_to_name = author.set_index('AuthorId')['FullName'].to_dict()
names = [id_to_name[k] for k in author2int.keys()]
emb_array = [emb[k] for k in ids]

# 2. 2-D projection of embeddings 

In [None]:
import umap
fit = umap.UMAP(metric='cosine')
u = fit.fit_transform(emb_array)

In [None]:
import plotly.graph_objects as go
import plotly as py
import plotly.express as px

df = pd.DataFrame({
    'x': u[:,0],
    'y': u[:,1],
    'name': names
})

fig = px.scatter(df, x="x", y="y", hover_name="name")
fig.update_layout(
            autosize=False,
            width=1000,
            height=800,
        )
fig.update_traces(marker=dict(size=3),
                          selector=dict(mode='markers'))

py.offline.plot(fig, filename="example_interactive_html/umap_fig.html",  auto_open=False)

# 3. Sem_axis results

For the detail, please read Sem_aixs paper, https://arxiv.org/abs/1806.05521. <br>
Sem_axis usually used in word space, to characterize word semantics using many semantic axes. But It can be applied on the network also.<br>
In this example, we define axis from two-person (Newman as a negative anchor and Barabasi as a positive anchor). 
Then, we can interpret persons with negative values as more Newman friendly researcher, and the person with positive values as more Barabasi friendly researcher.

In [None]:
positive_entities = [245542] # Barabasi's vector
negative_entities = [301349] # Newman's vector

In [None]:
sem_aixs_dict = pyscisci.sem_axis(emb, positive_entities , negative_entities)
sem_axis_array = [sem_aixs_dict[id_] for id_ in ids]

In [None]:
df = pd.DataFrame({
    'sem_axis_result': sem_axis_array,
    'y': 0,
    'name': names,
})

In [None]:
fig = px.scatter(df, x="sem_axis_result", y="y", hover_name="name")
fig.update_layout(
    autosize=False,
    width=1200,
    height=300,
    yaxis={
        'range': [-0.1, 0.1],
        'showgrid': False, # thin lines in the background
        'zeroline': False, # thick line at x=0
        'visible': False,  # numbers below
    },
    xaxis={
        'showgrid': False, # thin lines in the background
        'zeroline': False, # thick line at x=0
    }
    
)
fig.update_traces(marker=dict(size=3),
                          selector=dict(mode='markers'))

py.offline.plot(fig, filename="example_interactive_html/sem_axis.html",  auto_open=False)