In [1]:
import random

import numpy as np
import pandas as pd

from nltk.corpus import words

In [2]:
# import nltk

# nltk.download('words')

In [3]:
from bluegraph import PandasPGFrame
from bluegraph.preprocess import ScikitLearnPGEncoder
from bluegraph.backends.stellargraph import StellarGraphNodeEmbedder

In [4]:
nodes = [
    "Alice", "Bob", "Eric", "John", "Anna", "Laura", "Matt"
]
age = [25, 9, 70, 42, 26, 35, 36]
height = [180, 122, 173, 194, 172, 156, 177]
weight = [75, 43, 68, 82, 70, 59, 81]
sources = [
    "Alice", "Alice", "Bob", "Bob", "Bob", "Eric", "Anna", "Anna", "Matt"
]
targets = [
    "Bob", "Eric", "Eric", "John", "Anna", "Anna", "Laura", "John", "John"
]
weights = [1.0, 2.2, 0.3, 4.1, 1.5, 21.0, 1.0, 2.5, 7.5]
edges = list(zip(sources, targets))
frame = PandasPGFrame(nodes=nodes, edges=edges)

# Add properties

a = pd.DataFrame()
frame.add_node_properties(
    {
        "@id": nodes,
        "age": age
    }, prop_type="numeric")
frame.add_node_properties(
    {
        "@id": nodes,
        "height": height
    }, prop_type="numeric")
frame.add_node_properties(
    {
        "@id": nodes,
        "weight": weight
    }, prop_type="numeric")

edge_weight = pd.DataFrame({
    "@source_id": sources,
    "@target_id": targets,
    "distance": weights
})
frame.add_edge_properties(edge_weight, prop_type="numeric")

In [5]:
props = ["age", "height", "weight"]

In [9]:
df =frame._nodes[props]

In [11]:
df

Unnamed: 0_level_0,age,height,weight
@id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alice,25,180,75
Bob,9,122,43
Eric,70,173,68
John,42,194,82
Anna,26,172,70
Laura,35,156,59
Matt,36,177,81


In [14]:
df.to_numpy().tolist()

[[25, 180, 75],
 [9, 122, 43],
 [70, 173, 68],
 [42, 194, 82],
 [26, 172, 70],
 [35, 156, 59],
 [36, 177, 81]]

In [15]:
df["_generated_features"] = df.to_numpy().tolist()

In [16]:
df

Unnamed: 0_level_0,age,height,weight,_generated_features
@id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alice,25,180,75,"[25, 180, 75]"
Bob,9,122,43,"[9, 122, 43]"
Eric,70,173,68,"[70, 173, 68]"
John,42,194,82,"[42, 194, 82]"
Anna,26,172,70,"[26, 172, 70]"
Laura,35,156,59,"[35, 156, 59]"
Matt,36,177,81,"[36, 177, 81]"


In [4]:
def generate_targets(nodes, s, density=0.2):
    edges = []
    for t in nodes:
        if s < t:
            edge = np.random.choice([0, 1], p=[1 - density, density])
            if edge:
                
                edges.append([s, t])
    return edges


def random_pgframe(n_nodes, density):
    nodes = list(range(n_nodes))

    edges = sum(
        map(lambda x: generate_targets(nodes, x, density), nodes), [])
    edges = pd.DataFrame(
        edges, columns=["@source_id", "@target_id"])
    edges_df = edges.set_index(["@source_id", "@target_id"])
    frame = PandasPGFrame(nodes=nodes, edges=edges_df.index)
    return frame

In [5]:
N = 70
density = 0.13

# Generate a random graph

In [6]:
graph_frame = random_pgframe(N, density)

In [7]:
graph_frame._nodes.reset_index().to_dict("records")

[{'@id': 0},
 {'@id': 1},
 {'@id': 2},
 {'@id': 3},
 {'@id': 4},
 {'@id': 5},
 {'@id': 6},
 {'@id': 7},
 {'@id': 8},
 {'@id': 9},
 {'@id': 10},
 {'@id': 11},
 {'@id': 12},
 {'@id': 13},
 {'@id': 14},
 {'@id': 15},
 {'@id': 16},
 {'@id': 17},
 {'@id': 18},
 {'@id': 19},
 {'@id': 20},
 {'@id': 21},
 {'@id': 22},
 {'@id': 23},
 {'@id': 24},
 {'@id': 25},
 {'@id': 26},
 {'@id': 27},
 {'@id': 28},
 {'@id': 29},
 {'@id': 30},
 {'@id': 31},
 {'@id': 32},
 {'@id': 33},
 {'@id': 34},
 {'@id': 35},
 {'@id': 36},
 {'@id': 37},
 {'@id': 38},
 {'@id': 39},
 {'@id': 40},
 {'@id': 41},
 {'@id': 42},
 {'@id': 43},
 {'@id': 44},
 {'@id': 45},
 {'@id': 46},
 {'@id': 47},
 {'@id': 48},
 {'@id': 49},
 {'@id': 50},
 {'@id': 51},
 {'@id': 52},
 {'@id': 53},
 {'@id': 54},
 {'@id': 55},
 {'@id': 56},
 {'@id': 57},
 {'@id': 58},
 {'@id': 59},
 {'@id': 60},
 {'@id': 61},
 {'@id': 62},
 {'@id': 63},
 {'@id': 64},
 {'@id': 65},
 {'@id': 66},
 {'@id': 67},
 {'@id': 68},
 {'@id': 69}]

In [8]:
graph_frame._edges

@source_id,@target_id
0,12
0,13
0,27
0,28
0,40
...,...
58,66
59,67
62,63
65,66


# Add node and edge types

In [9]:
types = ["Apple", "Orange", "Carrot"]

In [10]:
node_types = {
    n: np.random.choice(types, p=[0.5, 0.4, 0.1])
    for n in range(N)
}

In [11]:
graph_frame.add_node_types(node_types)

In [12]:
graph_frame._nodes

Unnamed: 0_level_0,@type
@id,Unnamed: 1_level_1
0,Orange
1,Carrot
2,Apple
3,Apple
4,Apple
...,...
65,Orange
66,Apple
67,Orange
68,Apple


In [13]:
types = ["isFriend", "isEnemy"]

In [14]:
edge_types = {
    e: np.random.choice(types, p=[0.8, 0.2])
    for e in graph_frame.edges()
}

In [15]:
graph_frame.add_edge_types(edge_types)

In [16]:
graph_frame._edges

Unnamed: 0_level_0,Unnamed: 1_level_0,@type
@source_id,@target_id,Unnamed: 2_level_1
0,12,isFriend
0,13,isEnemy
0,27,isEnemy
0,28,isFriend
0,40,isFriend
...,...,...
58,66,isFriend
59,67,isEnemy
62,63,isFriend
65,66,isFriend


# Add node and edge properties

numerical, categorical, text

## Add node properties

In [17]:
weight = pd.DataFrame(
    [
        (n, np.random.normal(loc=35, scale=5))
        for n in graph_frame.nodes()
    ], 
    columns=["@id", "weight"]
)

In [18]:
graph_frame.add_node_properties(weight, prop_type="numeric")

In [19]:
colors = ["red", "green", "blue"]

In [20]:
colors = pd.DataFrame(
    [
        (n, np.random.choice(colors))
        for n in graph_frame.nodes()
    ], 
    columns=["@id", "color"]
)

In [21]:
graph_frame.add_node_properties(colors, prop_type="category")

In [22]:
desc = pd.DataFrame(
    [
        (n, ' '.join(random.sample(words.words(), 20)))
        for n in graph_frame.nodes()
    ], 
    columns=["@id", "desc"]
)

In [23]:
graph_frame.add_node_properties(desc, prop_type="text")

In [24]:
graph_frame._nodes

Unnamed: 0_level_0,@type,weight,color,desc
@id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Orange,39.567907,green,hydrosulphurated Chiriguano winterwards tarbut...
1,Carrot,37.675663,blue,predestinator initiatrix sporangiferous esslin...
2,Apple,35.347256,green,nifle viragoish smokebush tacuacine acetylchol...
3,Apple,33.197314,blue,wringman volubilate spinobulbar noncontingent ...
4,Apple,32.910243,red,skiv inclementness Kipchak neuronic Menobranch...
...,...,...,...,...
65,Orange,32.447448,red,forecar cerasein psychomachy preboyhood chytri...
66,Apple,35.501827,green,seralbumin sagacious scotomatical ametabolous ...
67,Orange,32.744517,blue,safeguard anna pollinium baboodom conceptionis...
68,Apple,38.371299,red,lavatic exquisiteness Arcacea Hapi disheritmen...


## Add edge properties

In [25]:
years = pd.DataFrame(
    [
        (s, t, np.random.randint(0, 20))
        for s, t in graph_frame.edges()
    ], 
    columns=["@source_id", "@target_id", "n_years"]
)

In [26]:
graph_frame.add_edge_properties(years, prop_type="numeric")

In [27]:
shapes = ["dashed", "dotted", "solid"]
shapes = pd.DataFrame(
    [
        (s, t, np.random.choice(shapes))
        for s, t, in graph_frame.edges()
    ], 
    columns=["@source_id", "@target_id", "shapes"]
)

In [28]:
graph_frame.add_edge_properties(shapes, prop_type="category")

In [29]:
desc = pd.DataFrame(
    [
        (s, t, ' '.join(random.sample(words.words(), 20)))
        for s, t, in graph_frame.edges()
    ], 
    columns=["@source_id", "@target_id", "desc"]
)

In [30]:
graph_frame.add_edge_properties(desc, prop_type="text")

In [31]:
graph_frame._edges

Unnamed: 0_level_0,Unnamed: 1_level_0,@type,n_years,shapes,desc
@source_id,@target_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,12,isFriend,14,dashed,safe hangmanship fruitade Mniaceae benzophenot...
0,13,isEnemy,16,dotted,coremium essoin lessen manganiferous Selbornia...
0,27,isEnemy,14,dashed,ophidious denudant delightfulness differentian...
0,28,isFriend,16,dotted,sparrowless microtypical vagabondage unplantab...
0,40,isFriend,3,solid,caseless Phoenicopteroideae haughtily oquassa ...
...,...,...,...,...,...
58,66,isFriend,11,dashed,stoichiological masculinity tavert exculpable ...
59,67,isEnemy,18,dotted,steepletop avowably pleonic pedomorphic boltup...
62,63,isFriend,11,solid,flaminica hopcrease ovatopyriform beastily phi...
65,66,isFriend,8,solid,Ephemeroptera anatomicochirurgical shou kataly...


In [32]:
graph_frame._edge_prop_types

{'@type': 'category',
 'n_years': 'numeric',
 'shapes': 'category',
 'desc': 'text'}

# Property encoding

In [33]:
hom_encoder = ScikitLearnPGEncoder(
    heterogeneous=False,
    encode_types=True, drop_types=True, text_encoding="tfidf")

In [34]:
graph_frame._edge_prop_types

{'@type': 'category',
 'n_years': 'numeric',
 'shapes': 'category',
 'desc': 'text'}

In [68]:
transformed_frame = hom_encoder.fit_transform(graph_frame)

Fitting for:  @type
Fitting for:  weight
Fitting for:  color
Fitting for:  desc
Fitting for:  @type
Fitting for:  n_years
Fitting for:  shapes
Fitting for:  desc


In [69]:
transformed_frame._nodes

Unnamed: 0_level_0,features
@id,Unnamed: 1_level_1
0,"[0.0, 0.0, 1.0, 0.76633507437344, 0.0, 1.0, 0...."
1,"[0.0, 1.0, 0.0, 0.3998266913929289, 1.0, 0.0, ..."
2,"[1.0, 0.0, 0.0, -0.051162051616044556, 0.0, 1...."
3,"[1.0, 0.0, 0.0, -0.4675838610594931, 1.0, 0.0,..."
4,"[1.0, 0.0, 0.0, -0.5231865188868914, 0.0, 0.0,..."
...,...
65,"[0.0, 0.0, 1.0, -0.6128252940491069, 0.0, 0.0,..."
66,"[1.0, 0.0, 0.0, -0.021223134049023457, 0.0, 1...."
67,"[0.0, 0.0, 1.0, -0.5552859234669962, 1.0, 0.0,..."
68,"[1.0, 0.0, 0.0, 0.5345643077844714, 0.0, 0.0, ..."


In [70]:
hom_encoder._node_encoders

{'@type': MultiLabelBinarizer(),
 'weight': StandardScaler(),
 'color': MultiLabelBinarizer(),
 'desc': TfidfVectorizer(max_features=64, stop_words='english', sublinear_tf=True)}

In [73]:
transformed_frame._nodes

Unnamed: 0_level_0,features
@id,Unnamed: 1_level_1
0,"[0.0, 0.0, 1.0, 0.76633507437344, 0.0, 1.0, 0...."
1,"[0.0, 1.0, 0.0, 0.3998266913929289, 1.0, 0.0, ..."
2,"[1.0, 0.0, 0.0, -0.051162051616044556, 0.0, 1...."
3,"[1.0, 0.0, 0.0, -0.4675838610594931, 1.0, 0.0,..."
4,"[1.0, 0.0, 0.0, -0.5231865188868914, 0.0, 0.0,..."
...,...
65,"[0.0, 0.0, 1.0, -0.6128252940491069, 0.0, 0.0,..."
66,"[1.0, 0.0, 0.0, -0.021223134049023457, 0.0, 1...."
67,"[0.0, 0.0, 1.0, -0.5552859234669962, 1.0, 0.0,..."
68,"[1.0, 0.0, 0.0, 0.5345643077844714, 0.0, 0.0, ..."


In [74]:
transformed_frame._edges

Unnamed: 0_level_0,Unnamed: 1_level_0,features
@source_id,@target_id,Unnamed: 2_level_1
0,12,"[0.0, 1.0, 0.7956329696802654, 1.0, 0.0, 0.0, ..."
0,13,"[1.0, 0.0, 1.1411893801044022, 0.0, 1.0, 0.0, ..."
0,27,"[1.0, 0.0, 0.7956329696802654, 1.0, 0.0, 0.0, ..."
0,28,"[0.0, 1.0, 1.1411893801044022, 0.0, 1.0, 0.0, ..."
0,40,"[0.0, 1.0, -1.1049272876524865, 0.0, 0.0, 1.0,..."
...,...,...
58,66,"[0.0, 1.0, 0.27729835404406034, 1.0, 0.0, 0.0,..."
59,67,"[1.0, 0.0, 1.4867457905285388, 0.0, 1.0, 0.0, ..."
62,63,"[0.0, 1.0, 0.27729835404406034, 0.0, 0.0, 1.0,..."
65,66,"[0.0, 1.0, -0.24103626159214475, 0.0, 0.0, 1.0..."


In [77]:
import time

In [78]:
embedder = StellarGraphNodeEmbedder(model_name="attri2vec")
embedder.set_graph(
    transformed_frame, directed=True,
    include_type=False, feature_prop="features")

In [81]:
print("Training an embedding model...")
start = time.time()
embedder.fit_model(epochs=10)
print("Total time to train and save {:.2f} s".format(
    time.time() - start))

Training an embedding model...
link_classification: using 'ip' method to combine node embeddings into edge embeddings
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Total time to train and save 1.40 s


In [82]:
from kgforge.core.resource import Resource

jsonld_repr = graph_frame.to_jsonld(edges_key="outEdges")

In [86]:
jsonld_repr

[{'@id': '0',
  '@type': ['Orange'],
  'weight': 39.567907141073626,
  'color': 'green',
  'desc': 'hydrosulphurated Chiriguano winterwards tarbuttite fetishistic calpulli snowbird spermatocele redheadedness nonconductor inspectability fodient neuroplexus niggardly upbraidingly coauthor worriedness madefy unscheduled unitarism',
  'outEdges': [{'n_years': 14,
    'shapes': 'dashed',
    'desc': 'safe hangmanship fruitade Mniaceae benzophenothiazine falsifiable corrente mastitis homeopathist Archidamus voidly frisca unconsulting polycrystalline popadam cordierite duikerbok panties indevotional zyga',
    'isFriend': {'@id': '12'}},
   {'n_years': 16,
    'shapes': 'dotted',
    'desc': 'coremium essoin lessen manganiferous Selbornian sportance Ulmus postmillennialism adnexal acyanopsia mandibuliform myriotrichiaceous reattract transprocess ovoidal cheap baghouse Gothic salopian skomerite',
    'isEnemy': {'@id': '13'}},
   {'n_years': 14,
    'shapes': 'dashed',
    'desc': 'ophidious d

In [83]:
new_frame = PandasPGFrame()
new_frame.from_jsonld(jsonld_repr, types_from_relations=False)

In [84]:
new_frame._nodes

Unnamed: 0_level_0,@type,color,desc,weight
@id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Orange,green,hydrosulphurated Chiriguano winterwards tarbut...,39.567907
1,Carrot,blue,predestinator initiatrix sporangiferous esslin...,37.675663
10,Apple,blue,unindividuated adephagan onychauxis Graptolith...,41.632876
11,Orange,red,polybranch Dorking leukemic scaldberry Boutelo...,32.810433
12,Orange,blue,presprinkle quenchless Balaenidae anthophilian...,45.790499
...,...,...,...,...
68,Apple,red,lavatic exquisiteness Arcacea Hapi disheritmen...,38.371299
69,Apple,blue,syphilous groovy gorbellied tropophilous chiti...,37.738336
7,Orange,blue,sextonship skomerite sideromelane physicker qu...,34.906483
8,Orange,green,resinlike subtrist unaffirmed radiferous lube ...,32.001721


In [85]:
new_frame._edges

Unnamed: 0_level_0,Unnamed: 1_level_0,@type,desc,n_years,shapes
@source_id,@target_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,12,isFriend,safe hangmanship fruitade Mniaceae benzophenot...,14.0,dashed
0,13,isEnemy,coremium essoin lessen manganiferous Selbornia...,16.0,dotted
0,27,isEnemy,ophidious denudant delightfulness differentian...,14.0,dashed
0,28,isFriend,sparrowless microtypical vagabondage unplantab...,16.0,dotted
0,40,isFriend,caseless Phoenicopteroideae haughtily oquassa ...,3.0,solid
...,...,...,...,...,...
9,35,isFriend,unburial unfirm room fetisheer redding monopyr...,6.0,dotted
9,54,isFriend,Peripatopsidae snur unempty archibenthic brach...,19.0,dashed
9,56,isEnemy,Tekke tropologically sirloiny autochromy Chilt...,15.0,solid
9,65,isFriend,Cashibo crisper cuprammonia reprimer syngnatho...,5.0,dotted
