In [3]:
import random

import numpy as np
import pandas as pd

from nltk.corpus import words

In [4]:
from bluegraph import PandasPGFrame
from bluegraph.preprocess import ScikitLearnPGEncoder
from bluegraph.backends.stellargraph import StellarGraphNodeEmbedder

In [5]:
nodes = [
    "Alice", "Bob", "Eric", "John", "Anna", "Laura", "Matt"
]
age = [25, 9, 70, 42, 26, 35, 36]
height = [180, 122, 173, 194, 172, 156, 177]
weight = [75, 43, 68, 82, 70, 59, 81]
sources = [
    "Alice", "Alice", "Bob", "Bob", "Bob", "Eric", "Anna", "Anna", "Matt"
]
targets = [
    "Bob", "Eric", "Eric", "John", "Anna", "Anna", "Laura", "John", "John"
]
weights = [1.0, 2.2, 0.3, 4.1, 1.5, 21.0, 1.0, 2.5, 7.5]
edges = list(zip(sources, targets))
frame = PandasPGFrame(nodes=nodes, edges=edges)

# Add properties

a = pd.DataFrame()
frame.add_node_properties(
    {
        "@id": nodes,
        "age": age
    }, prop_type="numeric")
frame.add_node_properties(
    {
        "@id": nodes,
        "height": height
    }, prop_type="numeric")
frame.add_node_properties(
    {
        "@id": nodes,
        "weight": weight
    }, prop_type="numeric")

edge_weight = pd.DataFrame({
    "@source_id": sources,
    "@target_id": targets,
    "distance": weights
})
frame.add_edge_properties(edge_weight, prop_type="numeric")

In [6]:
props = ["age", "height", "weight"]

In [7]:
df =frame._nodes[props]

In [8]:
df

Unnamed: 0_level_0,age,height,weight
@id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Alice,25,180,75
Bob,9,122,43
Eric,70,173,68
John,42,194,82
Anna,26,172,70
Laura,35,156,59
Matt,36,177,81


In [9]:
df.to_numpy().tolist()

[[25, 180, 75],
 [9, 122, 43],
 [70, 173, 68],
 [42, 194, 82],
 [26, 172, 70],
 [35, 156, 59],
 [36, 177, 81]]

In [10]:
df["_generated_features"] = df.to_numpy().tolist()

In [11]:
df

Unnamed: 0_level_0,age,height,weight,_generated_features
@id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alice,25,180,75,"[25, 180, 75]"
Bob,9,122,43,"[9, 122, 43]"
Eric,70,173,68,"[70, 173, 68]"
John,42,194,82,"[42, 194, 82]"
Anna,26,172,70,"[26, 172, 70]"
Laura,35,156,59,"[35, 156, 59]"
Matt,36,177,81,"[36, 177, 81]"


In [12]:
def generate_targets(nodes, s, density=0.2):
    edges = []
    for t in nodes:
        if s < t:
            edge = np.random.choice([0, 1], p=[1 - density, density])
            if edge:
                
                edges.append([s, t])
    return edges


def random_pgframe(n_nodes, density):
    nodes = list(range(n_nodes))

    edges = sum(
        map(lambda x: generate_targets(nodes, x, density), nodes), [])
    edges = pd.DataFrame(
        edges, columns=["@source_id", "@target_id"])
    edges_df = edges.set_index(["@source_id", "@target_id"])
    frame = PandasPGFrame(nodes=nodes, edges=edges_df.index)
    return frame

In [13]:
N = 70
density = 0.13

# Generate a random graph

In [14]:
graph_frame = random_pgframe(N, density)

In [15]:
graph_frame._nodes.reset_index().to_dict("records")

[{'@id': 0},
 {'@id': 1},
 {'@id': 2},
 {'@id': 3},
 {'@id': 4},
 {'@id': 5},
 {'@id': 6},
 {'@id': 7},
 {'@id': 8},
 {'@id': 9},
 {'@id': 10},
 {'@id': 11},
 {'@id': 12},
 {'@id': 13},
 {'@id': 14},
 {'@id': 15},
 {'@id': 16},
 {'@id': 17},
 {'@id': 18},
 {'@id': 19},
 {'@id': 20},
 {'@id': 21},
 {'@id': 22},
 {'@id': 23},
 {'@id': 24},
 {'@id': 25},
 {'@id': 26},
 {'@id': 27},
 {'@id': 28},
 {'@id': 29},
 {'@id': 30},
 {'@id': 31},
 {'@id': 32},
 {'@id': 33},
 {'@id': 34},
 {'@id': 35},
 {'@id': 36},
 {'@id': 37},
 {'@id': 38},
 {'@id': 39},
 {'@id': 40},
 {'@id': 41},
 {'@id': 42},
 {'@id': 43},
 {'@id': 44},
 {'@id': 45},
 {'@id': 46},
 {'@id': 47},
 {'@id': 48},
 {'@id': 49},
 {'@id': 50},
 {'@id': 51},
 {'@id': 52},
 {'@id': 53},
 {'@id': 54},
 {'@id': 55},
 {'@id': 56},
 {'@id': 57},
 {'@id': 58},
 {'@id': 59},
 {'@id': 60},
 {'@id': 61},
 {'@id': 62},
 {'@id': 63},
 {'@id': 64},
 {'@id': 65},
 {'@id': 66},
 {'@id': 67},
 {'@id': 68},
 {'@id': 69}]

In [16]:
graph_frame._edges

@source_id,@target_id
0,12
0,16
0,30
0,34
0,37
...,...
62,68
62,69
64,68
67,68


# Add node and edge types

In [17]:
types = ["Apple", "Orange", "Carrot"]

In [18]:
node_types = {
    n: np.random.choice(types, p=[0.5, 0.4, 0.1])
    for n in range(N)
}

In [19]:
graph_frame.add_node_types(node_types)

In [20]:
graph_frame._nodes

Unnamed: 0_level_0,@type
@id,Unnamed: 1_level_1
0,Apple
1,Orange
2,Orange
3,Apple
4,Apple
...,...
65,Apple
66,Apple
67,Apple
68,Orange


In [21]:
types = ["isFriend", "isEnemy"]

In [22]:
edge_types = {
    e: np.random.choice(types, p=[0.8, 0.2])
    for e in graph_frame.edges()
}

In [23]:
graph_frame.add_edge_types(edge_types)

In [24]:
graph_frame._edges

Unnamed: 0_level_0,Unnamed: 1_level_0,@type
@source_id,@target_id,Unnamed: 2_level_1
0,12,isFriend
0,16,isFriend
0,30,isFriend
0,34,isFriend
0,37,isFriend
...,...,...
62,68,isEnemy
62,69,isFriend
64,68,isFriend
67,68,isFriend


# Add node and edge properties

numerical, categorical, text

## Add node properties

In [25]:
weight = pd.DataFrame(
    [
        (n, np.random.normal(loc=35, scale=5))
        for n in graph_frame.nodes()
    ], 
    columns=["@id", "weight"]
)

In [26]:
graph_frame.add_node_properties(weight, prop_type="numeric")

In [27]:
colors = ["red", "green", "blue"]

In [28]:
colors = pd.DataFrame(
    [
        (n, np.random.choice(colors))
        for n in graph_frame.nodes()
    ], 
    columns=["@id", "color"]
)

In [29]:
graph_frame.add_node_properties(colors, prop_type="category")

In [30]:
desc = pd.DataFrame(
    [
        (n, ' '.join(random.sample(words.words(), 20)))
        for n in graph_frame.nodes()
    ], 
    columns=["@id", "desc"]
)

In [31]:
graph_frame.add_node_properties(desc, prop_type="text")

In [32]:
graph_frame._nodes

Unnamed: 0_level_0,@type,weight,color,desc
@id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Apple,30.858742,blue,emancipatist viviparity eyeseed uninfiltrated ...
1,Orange,30.372783,blue,febriferous noncarbonate vinegarist proseminar...
2,Orange,34.275146,green,burbank hygrophthalmic phonogrammically thief ...
3,Apple,38.791235,red,subclimax uncultivate albuminocholia cespitito...
4,Apple,32.624292,red,fasciolar dermoplasty nautch ergotaminine bewh...
...,...,...,...,...
65,Apple,35.052720,blue,ferroaluminum synantherologist sighingly splur...
66,Apple,35.497732,blue,nonimpressionist idiotype partymonger pignolia...
67,Apple,33.673322,blue,spectrochemical seaweedy Thackerayan inconspic...
68,Orange,30.845120,blue,deliver asexually Raymond colleen preacetabula...


## Add edge properties

In [33]:
years = pd.DataFrame(
    [
        (s, t, np.random.randint(0, 20))
        for s, t in graph_frame.edges()
    ], 
    columns=["@source_id", "@target_id", "n_years"]
)

In [34]:
graph_frame.add_edge_properties(years, prop_type="numeric")

In [35]:
shapes = ["dashed", "dotted", "solid"]
shapes = pd.DataFrame(
    [
        (s, t, np.random.choice(shapes))
        for s, t, in graph_frame.edges()
    ], 
    columns=["@source_id", "@target_id", "shapes"]
)

In [36]:
graph_frame.add_edge_properties(shapes, prop_type="category")

In [37]:
desc = pd.DataFrame(
    [
        (s, t, ' '.join(random.sample(words.words(), 20)))
        for s, t, in graph_frame.edges()
    ], 
    columns=["@source_id", "@target_id", "desc"]
)

In [38]:
graph_frame.add_edge_properties(desc, prop_type="text")

In [39]:
graph_frame._edges

Unnamed: 0_level_0,Unnamed: 1_level_0,@type,n_years,shapes,desc
@source_id,@target_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,12,isFriend,1,dashed,anthropometry unlawfully gauging complain Powh...
0,16,isFriend,15,dashed,medicomoral incorporeally alose ethyne fisticu...
0,30,isFriend,14,dotted,planeness Galenian cavernal holocephalous igni...
0,34,isFriend,9,dashed,cabbagehead Palamedeidae postantennal separata...
0,37,isFriend,11,dotted,nearly rachidial risper sweepingness tailhead ...
...,...,...,...,...,...
62,68,isEnemy,12,dashed,actification flabbergast nonnegotiable droppin...
62,69,isFriend,8,dashed,thumby intrapial Cyperaceae niggardliness Hypo...
64,68,isFriend,5,solid,andron anthroic pharyngognath marshwort polyax...
67,68,isFriend,1,dotted,overdiversification semicubical cholagogic mou...


In [40]:
graph_frame._edge_prop_types

{'@type': 'category',
 'n_years': 'numeric',
 'shapes': 'category',
 'desc': 'text'}

# Property encoding

In [41]:
## Run this if nltk error occurs (need to download the 'words' corpus)
# import nltk
# nltk.download('words')

In [42]:
hom_encoder = ScikitLearnPGEncoder(
    heterogeneous=False,
    encode_types=True, drop_types=True, text_encoding="tfidf")

In [43]:
graph_frame._edge_prop_types

{'@type': 'category',
 'n_years': 'numeric',
 'shapes': 'category',
 'desc': 'text'}

In [54]:
transformed_frame = hom_encoder.fit_transform(graph_frame)

In [55]:
transformed_frame._nodes

Unnamed: 0_level_0,features
@id,Unnamed: 1_level_1
0,
1,
2,
3,
4,
...,...
65,
66,
67,
68,


In [56]:
hom_encoder._node_encoders

{}

In [57]:
transformed_frame._nodes

Unnamed: 0_level_0,features
@id,Unnamed: 1_level_1
0,
1,
2,
3,
4,
...,...
65,
66,
67,
68,


In [58]:
transformed_frame._edges

@source_id,@target_id
0,12
0,16
0,30
0,34
0,37
...,...
62,68
62,69
64,68
67,68


In [59]:

jsonld_repr = graph_frame.to_jsonld(edges_key="outEdges")

In [60]:
jsonld_repr

[{'@id': '0',
  '@type': ['Apple'],
  'weight': 30.858742268846534,
  'color': 'blue',
  'desc': 'emancipatist viviparity eyeseed uninfiltrated monocotyledonous Populism raggy entertainment Fulgora wowser entericoid latera plasticization uncaste photographize buttock deaeration west synergic pleurostict',
  'outEdges': [{'n_years': 1,
    'shapes': 'dashed',
    'desc': 'anthropometry unlawfully gauging complain Powhatan brewster lawish amphoriloquy granoblastic scalloping erection punctist interfederation coaudience rhombos Delhi preoffensiveness metaplasmic subseries unabsorb',
    'isFriend': {'@id': '12'}},
   {'n_years': 15,
    'shapes': 'dashed',
    'desc': 'medicomoral incorporeally alose ethyne fisticuffer disappointingly hyperparasitize enscroll schediasm relessee nonconstruable neuroclonic brushland vocalization hypermetropic disgusting tangence redundant Anthomyia crystallic',
    'isFriend': {'@id': '16'}},
   {'n_years': 14,
    'shapes': 'dotted',
    'desc': 'planeness

In [51]:
new_frame = PandasPGFrame()
new_frame.from_jsonld(jsonld_repr, types_from_relations=False)

In [52]:
new_frame._nodes

Unnamed: 0_level_0,@type,color,desc,weight
@id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Apple,blue,emancipatist viviparity eyeseed uninfiltrated ...,30.858742
1,Orange,blue,febriferous noncarbonate vinegarist proseminar...,30.372783
10,Apple,red,virilist unfrivolous feeze wagaun astrography ...,38.990393
11,Apple,blue,recushion sagittarius scleroblastema crossroad...,35.571036
12,Orange,green,hissproof Maurandia mafic Chinookan noncon hor...,44.270586
...,...,...,...,...
68,Orange,blue,deliver asexually Raymond colleen preacetabula...,30.845120
69,Orange,blue,unsolidly jipper dripper gammacismus flamant Z...,32.040912
7,Orange,blue,Randy Philetaerus attendant escobita peladic b...,41.188153
8,Apple,blue,unjoyfully perineural amacrine dermatomycosis ...,34.405167


In [53]:
new_frame._edges

Unnamed: 0_level_0,Unnamed: 1_level_0,@type,desc,n_years,shapes
@source_id,@target_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,12,isFriend,anthropometry unlawfully gauging complain Powh...,1.0,dashed
0,16,isFriend,medicomoral incorporeally alose ethyne fisticu...,15.0,dashed
0,30,isFriend,planeness Galenian cavernal holocephalous igni...,14.0,dotted
0,34,isFriend,cabbagehead Palamedeidae postantennal separata...,9.0,dashed
0,37,isFriend,nearly rachidial risper sweepingness tailhead ...,11.0,dotted
...,...,...,...,...,...
9,45,isFriend,phrynin knutty hemstitcher Bittium agglutinant...,2.0,solid
9,49,isFriend,pantle resplit undauntable postamniotic ricksh...,0.0,solid
9,51,isFriend,sputum champagneless anodontia Ceratitidae tet...,8.0,solid
9,52,isEnemy,juxtamarine agio plague hemitropal Purkinjean ...,11.0,dotted
