# Script to load wordnet, conceptnet data as separate graphs with single edge.

## Install pyTigerGraph

In [1]:
# Setup
!pip install pyTigerGraph



## Add Imports and Establish Initial Connection

In [20]:
import pyTigerGraph as tg
import json
import pandas as pd

# Connection parameters
hostName = "https://thinkotb.i.tgcloud.io/"
userName = "tigergraph"
password = ""

conn = tg.TigerGraphConnection(host=hostName, username=userName, password=password)

print("Connected")



import pandas as pd 

root='C:/Users/steam/Documents/andy/ThinkOTB/main/csv_imports/'

Connected


## Peek into the data 

### ConceptNet - Word

In [8]:
# root='/content/drive/MyDrive/LanguageModel/Data_Processing/csv_imports/''
root='csv_imports/'
nodes = pd.read_csv(root+'words.csv')
nodes.head(1)

Unnamed: 0,id:ID,name,pos,conceptUri,:LABEL
0,able.a,able,a,/c/en/able/a,Lemma;Concept


### ConceptNet - Synset

In [9]:
synsets = pd.read_csv(root+'synsets.csv')
synsets.head(1)

Unnamed: 0,id:ID,pos:string,definition:string,:LABEL
0,able.a.01,a,(usually followed by `to') having the necessar...,Synset


### ConceptNet - Edges

In [10]:
relationships = pd.read_csv(root+'relationships.csv',index_col=[0])
relationships.head(5)

Unnamed: 0_level_0,:END_ID,dataset:string,weight:double,:TYPE
:START_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
able.a.01,ability.n.01,7,2.0,3
able.a.01,ability.n.02,7,2.0,3
able.a,able.a.01,7,2.0,26
unable.a.01,ability.n.01,7,2.0,3
unable.a,unable.a.01,7,2.0,26


### ConceptNet - Part of Speech

In [11]:
relationships = pd.read_csv(root+'encoded/pos_wn.csv',index_col=[0])
relationships.head()

Unnamed: 0,type,id
0,adjective,0
1,noun,3
2,adverb,2
3,verb,4
4,adjective_satellite,1


In [40]:
##Clear graph if already solution has the same graph

conn.gsql('''
USE GLOBAL
DROP ALL
''')

'Dropping all, about 1 minute ...\nAbort all active loading jobs\nTry to abort all loading jobs on graph WordNet, it may take a while ...\n[ABORT_SUCCESS] No active Loading Job to abort.\nTry to abort all loading jobs on graph ConceptNET, it may take a while ...\n[ABORT_SUCCESS] No active Loading Job to abort.\nResetting GPE...\nSuccessfully reset GPE and GSE\nStopping GPE GSE\nSuccessfully stopped GPE GSE in 0.004 seconds\nClearing graph store...\nSuccessfully cleared graph store\nStarting GPE GSE RESTPP\nSuccessfully started GPE GSE RESTPP in 0.098 seconds\nEverything is dropped.'

## Define and Publish the Schema

In [41]:
# DEFINE / CREATE ALL EDGES AND VERTICES 
results = conn.gsql('''
  USE GLOBAL
  CREATE VERTEX words(PRIMARY_ID id STRING, uri STRING, word STRING, pos STRING,definition STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  CREATE DIRECTED EDGE type(FROM words, TO words, typeOf STRING) WITH REVERSE_EDGE="reverse_type"
  CREATE VERTEX word(PRIMARY_ID id STRING, name STRING, pos STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  CREATE VERTEX synset(PRIMARY_ID id STRING, definition STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"
  CREATE DIRECTED EDGE is_a(FROM synset, TO word|FROM word, TO synset, type STRING) WITH REVERSE_EDGE="reverse_is_a"
''')
print(results)

Successfully created vertex types: [words].
Successfully created edge types: [type].
Successfully created reverse edge types: [reverse_type].
Successfully created vertex types: [word].
Successfully created vertex types: [synset].
Successfully created edge types: [is_a].
Successfully created reverse edge types: [reverse_is_a].


## WordNET

### Create WordNet Graph

In [42]:
results = conn.gsql('CREATE GRAPH WordNet(words,type,reverse_type)')
print(results)

Stopping GPE GSE RESTPP
Successfully stopped GPE GSE RESTPP in 1.413 seconds
Starting GPE GSE RESTPP
Successfully started GPE GSE RESTPP in 0.100 seconds
The graph WordNet is created.


In [3]:
conn.graphname="WordNet"
secret = conn.createSecret()
print(secret)
authToken = conn.getToken(secret)
authToken = authToken[0]
print(authToken)
# authToken = 'rc7reopbis1667ksgcppq5v5fb99p6s1'
conn = tg.TigerGraphConnection(host=hostName, graphname="WordNet", username=userName, password=password, apiToken=authToken)

def pprint(string):
  print(json.dumps(string, indent=2))

cnj1k6kmj8gm4ae285mja68et64v0gck
mm1l01nudqa9ue2to9qlucls1ss9qfre


### Create Loading Jobs

#### Word

Let's take a look at what one of our files looks like so we can write a loading job.

- Here it's important to note that the `$0`, `$1` values line up with the columns of your data.
  In this example:
  - `$0` is the `uri` column,
  - `$1` is `id`,
  - `$2` is `word`
  - and so on

In [44]:
results = conn.gsql('''
  USE GRAPH WordNet
  BEGIN
  CREATE LOADING JOB load_job_WN FOR GRAPH WordNet {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO EDGE type VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="", QUOTE="double";
    }
  END
  ''')
print(results)

Using graph 'WordNet'
Successfully created loading jobs: [load_job_WN].


#### Word Edges

In [45]:
results = conn.gsql('''
  USE GRAPH WordNet
  BEGIN
  CREATE LOADING JOB load_job_WN_nodes FOR GRAPH WordNet {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX words VALUES($1, $0, $2, $3, $4) USING SEPARATOR=",", HEADER="true", EOL="", QUOTE="double";
    }
  END
  ''')
print(results)

Using graph 'WordNet'
Successfully created loading jobs: [load_job_WN_nodes].


### Load Data

#### Words

In [46]:
# Load the posts file wiht the 'load_words' job
load_words = root+'WN-nodes.csv'
results = conn.uploadFile(load_words, fileTag='MyDataSource', jobName='load_job_WN_nodes')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 117792,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [
        {
          "typeName": "words",
          "validObject": 117792,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "edge": [],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


#### Edges

In [47]:
# Load the posts file wiht the 'load_edges' job
load_edges = root+'WN-edges.csv'
results = conn.uploadFile(load_edges, fileTag='MyDataSource', jobName='load_job_WN')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 293652,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [],
      "edge": [
        {
          "typeName": "type",
          "validObject": 293652,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


### Exploring the Graph

#### Get Vertex and Edge Schema

In [4]:
results = conn.getVertexTypes()
print(f"Verticies: {results}")
vertices = results

results = conn.getEdgeTypes()
print(f"Edges: {results}")
edges = results

Verticies: ['words']
Edges: ['type']


In [49]:

print(f"Results for Post vertex")
pprint(conn.getVertexType("words"))

print("-----------------")
print(f"Results for liked edge")
pprint(conn.getEdgeType("type"))


Results for Post vertex
{
  "Config": {
    "TAGGABLE": false,
    "STATS": "OUTDEGREE_BY_EDGETYPE",
    "PRIMARY_ID_AS_ATTRIBUTE": true
  },
  "Attributes": [
    {
      "AttributeType": {
        "Name": "STRING"
      },
      "IsPartOfCompositeKey": false,
      "PrimaryIdAsAttribute": false,
      "AttributeName": "uri",
      "HasIndex": false,
      "internalAttribute": false,
      "IsPrimaryKey": false
    },
    {
      "AttributeType": {
        "Name": "STRING"
      },
      "IsPartOfCompositeKey": false,
      "PrimaryIdAsAttribute": false,
      "AttributeName": "word",
      "HasIndex": false,
      "internalAttribute": false,
      "IsPrimaryKey": false
    },
    {
      "AttributeType": {
        "Name": "STRING"
      },
      "IsPartOfCompositeKey": false,
      "PrimaryIdAsAttribute": false,
      "AttributeName": "pos",
      "HasIndex": false,
      "internalAttribute": false,
      "IsPrimaryKey": false
    },
    {
      "AttributeType": {
        "Name": "ST

### Counting Data

In [50]:
print("Vertex Counts")
for vertex in vertices:
  print(f"There are {conn.getVertexCount(vertex)} {vertex} vertices in the graph")

print("--------------")
print("Edge Counts")
for edge in edges:
  print(f"There are {conn.getEdgeCount(edge)} {edge} edges in the graph")

Vertex Counts
There are 117794 words vertices in the graph
--------------
Edge Counts
There are 293078 type edges in the graph


### Extracting Data

#### Vertex/Edge Set Format

##### Getting a Vertex

In [24]:
results = conn.getVerticesById("words", "15314760-n")
pprint(results)

[
  {
    "v_id": "15314760-n",
    "v_type": "words",
    "attributes": {
      "id": "15314760-n",
      "uri": "http://wordnet-rdf.princeton.edu/id/15314760-n",
      "word": "lead_time",
      "pos": "noun",
      "definition": "the time interval between the initiation and the completion of a production process; &quot;the lead times for many publications can vary tremendously&quot;; &quot;planning is an area where lead time can be reduced&quot;@en"
    }
  }
]


In [31]:
results = conn.getVertices("words",select="",where='word=="affordable"',limit="7",sort="", timeout=0)
pprint(results)
#q = conn.gsql('select * from words where word=="affordable" limit 4')
#q

[
  {
    "v_id": "00938372-s",
    "v_type": "words",
    "attributes": {
      "id": "00938372-s",
      "uri": "http://wordnet-rdf.princeton.edu/id/00938372-s",
      "word": "affordable",
      "pos": "adjective_satellite",
      "definition": "that you have the financial means for; &quot;low-cost housing&quot;@en"
    }
  }
]


In [32]:
results = conn.getVertices("words",select="",where='word=="housing"',limit="7",sort="", timeout=0)
pprint(results)

[
  {
    "v_id": "03551520-n",
    "v_type": "words",
    "attributes": {
      "id": "03551520-n",
      "uri": "http://wordnet-rdf.princeton.edu/id/03551520-n",
      "word": "housing",
      "pos": "noun",
      "definition": "structures collectively in which people are housed@en"
    }
  },
  {
    "v_id": "03551946-n",
    "v_type": "words",
    "attributes": {
      "id": "03551946-n",
      "uri": "http://wordnet-rdf.princeton.edu/id/03551946-n",
      "word": "housing",
      "pos": "noun",
      "definition": "a protective cover designed to contain or support a mechanical component@en"
    }
  }
]


##### Or Multiple Vertices

In [33]:
results = conn.getVertices("words",select="",where='word=="homelessness"',limit="7",sort="", timeout=0)
pprint(results)

[
  {
    "v_id": "13966105-n",
    "v_type": "words",
    "attributes": {
      "id": "13966105-n",
      "uri": "http://wordnet-rdf.princeton.edu/id/13966105-n",
      "word": "homelessness",
      "pos": "noun",
      "definition": "the state or condition of having no home (especially the state of living in the streets)@en"
    }
  }
]


In [25]:
tdf1 = conn.getVerticesById("words", ["15137796-n","15192825-n"])
pprint(tdf1)

[
  {
    "v_id": "15137796-n",
    "v_type": "words",
    "attributes": {
      "id": "15137796-n",
      "uri": "http://wordnet-rdf.princeton.edu/id/15137796-n",
      "word": "period",
      "pos": "noun",
      "definition": "an amount of time; &quot;a time period of 30 years&quot;; &quot;hastened the period of time of his recovery&quot;; &quot;Picasso's blue period&quot;@en"
    }
  },
  {
    "v_id": "15192825-n",
    "v_type": "words",
    "attributes": {
      "id": "15192825-n",
      "uri": "http://wordnet-rdf.princeton.edu/id/15192825-n",
      "word": "eve",
      "pos": "noun",
      "definition": "the period immediately before something; &quot;on the eve of the French Revolution&quot;@en"
    }
  }
]


##### Count Edges Connected to a Vertex

In [26]:
results = conn.getEdgeCountFrom("words", "15192825-n")
pprint(results)

{
  "type": 1,
  "reverse_type": 1
}


##### Show all Edges Connected to a Vertex

In [19]:
results = conn.getEdges("words", "15192825-n")
pprint(results)

NameError: name 'conn' is not defined

#### As Pandas Dataframe
Supports all of the above in native Pandas Dataframe format.

##### All Vertices of one Type

In [30]:
df1 = conn.getVertexDataframe("words")
print(df1)

              v_id          id  \
0       15314760-n  15314760-n   
1       15290975-n  15290975-n   
2       15289702-n  15289702-n   
3       15287435-n  15287435-n   
4       15264089-n  15264089-n   
...            ...         ...   
117789  04743169-n  04743169-n   
117790  04311353-n  04311353-n   
117791  04748435-n  04748435-n   
117792  02754634-v  02754634-v   
117793  07771065-n  07771065-n   

                                                   uri               word  \
0       http://wordnet-rdf.princeton.edu/id/15314760-n          lead_time   
1       http://wordnet-rdf.princeton.edu/id/15290975-n     starting_point   
2       http://wordnet-rdf.princeton.edu/id/15289702-n         allegretto   
3       http://wordnet-rdf.princeton.edu/id/15287435-n          split_run   
4       http://wordnet-rdf.princeton.edu/id/15264089-n             season   
...                                                ...                ...   
117789  http://wordnet-rdf.princeton.edu/id/04743169

##### One or More Vertex

In [29]:
df2 = conn.getVertexDataframeById("words", ["15192825-n"])
print(df2)

         v_id          id                                             uri  \
0  15192825-n  15192825-n  http://wordnet-rdf.princeton.edu/id/15192825-n   

  word   pos                                         definition  
0  eve  noun  the period immediately before something; &quot...  


##### Convert Vertex/Edge Set to Dataframe
We'll use the results from the 'Or Multiple Vertices' cell. 

In [30]:
df3 = conn.vertexSetToDataFrame(tdf1)
print(df3)

         v_id          id                                             uri  \
0  15137796-n  15137796-n  http://wordnet-rdf.princeton.edu/id/15137796-n   
1  15192825-n  15192825-n  http://wordnet-rdf.princeton.edu/id/15192825-n   

     word   pos                                         definition  
0  period  noun  an amount of time; &quot;a time period of 30 y...  
1     eve  noun  the period immediately before something; &quot...  


##### Get Edges

In [31]:
df4 = conn.getEdgesDataframe("words", "15192825-n", limit=3)
print(df4)

  from_type     from_id to_type       to_id    typeOf
0     words  15192825-n   words  15137796-n  hypernym
1     words  15192825-n   words  15137796-n   hyponym


### Path Finding
Find paths between vertices.

Supported are:
- shortestPath - one shortest path between vertices
- allPaths - all paths within the specified edge limit

In [34]:
results = conn.shortestPath([("words", "03551946-n")], [("words", "13966105-n")])
pprint(results)

[{'type': 'words', 'id': '03551946-n'}]
[{'type': 'words', 'id': '13966105-n'}]
[
  {
    "vertices": [],
    "edges": []
  }
]


## ConceptNET

### Create WordNet Graph

In [51]:
results = conn.gsql('CREATE GRAPH ConceptNET(word, synset, is_a, reverse_is_a)')
print(results)

The graph ConceptNET is created.


In [21]:
conn.graphname="ConceptNET"
secret = conn.createSecret()
print(secret)
authToken = conn.getToken(secret)
authToken = authToken[0]
print(authToken)
conn = tg.TigerGraphConnection(host=hostName, graphname="ConceptNET", username=userName, password=password, apiToken=authToken)

def pprint(string):
  print(json.dumps(string, indent=2))

8lp3b74qhr9vnv8u9c9iop7bpl4gnmss
7oa75nt9td99pvi8ssgalclm7rkmfimo


#### Create Loading Jobs

#### Concept/ Words Edges

Let's take a look at what one of our files looks like so we can write a loading job. 

- has_synset(FROM rootWord, TO synset)
is_a_synset_of(FROM synset, TO rootWord)


Here it's important to note that the `$0`, `$1` values line up with the columns of your data.
In this example:
- `$0` is the `uri` column,
- `$1` is `id`,
- `$2` is `word`
- and so on

In [53]:
results = conn.gsql('''
  USE GRAPH ConceptNET
  BEGIN
  CREATE LOADING JOB load_job_words FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX word VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="", QUOTE="double";
    }

    CREATE LOADING JOB load_job_synsets FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX synset VALUES($0, $2) USING SEPARATOR=",", HEADER="true", EOL="", QUOTE="double";
    }


    CREATE LOADING JOB load_job_relationships FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO EDGE is_a VALUES($0 word, $1 synset, $4) USING SEPARATOR=",", HEADER="true", EOL="";
      LOAD MyDataSource TO EDGE is_a VALUES($0 synset, $1 word, $4) USING SEPARATOR=",", HEADER="true", EOL="";
    }

  END
  ''')
print(results)

Using graph 'ConceptNET'
Successfully created loading jobs: [load_job_words].
Successfully created loading jobs: [load_job_synsets].
Successfully created loading jobs: [load_job_relationships].


#### Concept/ Word Root


In [54]:
results = conn.gsql('''
  USE GRAPH ConceptNET
  BEGIN
  CREATE LOADING JOB load_job_words_csv_1644515929297 FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX word VALUES($0, $1, $2) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
    }

  CREATE LOADING JOB load_job_relationships_csv_1644515956497 FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO EDGE is_a VALUES($0 synset, $1 word, $4) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
      LOAD MyDataSource TO EDGE is_a VALUES($0 word, $1 synset, $4) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
    }

  CREATE LOADING JOB load_job_synsets_csv_1644515968121 FOR GRAPH ConceptNET {
      DEFINE FILENAME MyDataSource;
      LOAD MyDataSource TO VERTEX synset VALUES($0, $2) USING SEPARATOR=",", HEADER="true", EOL="\n", QUOTE="double";
    }
  END
  ''')
print(results)

Using graph 'ConceptNET'
Successfully created loading jobs: [load_job_words_csv_1644515929297].
Successfully created loading jobs: [load_job_relationships_csv_1644515956497].
Successfully created loading jobs: [load_job_synsets_csv_1644515968121].


#### Concept/ Synset 
- synset(PRIMARY_ID id STRING, pos STRING, definition STRING, label STRING) WITH STATS="OUTDEGREE_BY_EDGETYPE", PRIMARY_ID_AS_ATTRIBUTE="true"


### Load Data

#### Words

In [55]:
# Load the posts file wiht the 'load_words' job
load_words = root+'words.csv'
results = conn.uploadFile(load_words, timeout='100000',fileTag='MyDataSource', jobName='load_job_words_csv_1644515929297')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 1530137,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [
        {
          "typeName": "word",
          "validObject": 1530137,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "edge": [],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


#### Synsets

In [56]:
# Load the posts file wiht the 'load_synsets' job
load_synsets = root+'synsets.csv'
results = conn.uploadFile(load_synsets, timeout='100000', fileTag='MyDataSource', jobName='load_job_synsets_csv_1644515968121')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 117660,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [
        {
          "typeName": "synset",
          "validObject": 117660,
          "noIdFound": 0,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "edge": [],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


#### Edges

In [57]:
# Load the posts file wiht the 'load_edges' job
load_edges = root+'relationships.csv'
results = conn.uploadFile(load_edges, timeout='0', fileTag='MyDataSource', jobName='load_job_relationships_csv_1644515956497')
print(json.dumps(results, indent=2))

[
  {
    "sourceFileName": "Online_POST",
    "statistics": {
      "validLine": 2574404,
      "rejectLine": 0,
      "failedConditionLine": 0,
      "notEnoughToken": 0,
      "invalidJson": 0,
      "oversizeToken": 0,
      "vertex": [],
      "edge": [
        {
          "typeName": "is_a",
          "validObject": 2574363,
          "noIdFound": 41,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        },
        {
          "typeName": "is_a",
          "validObject": 2574363,
          "noIdFound": 41,
          "invalidAttribute": 0,
          "invalidVertexType": 0,
          "invalidPrimaryId": 0,
          "invalidSecondaryId": 0,
          "incorrectFixedBinaryLength": 0
        }
      ],
      "deleteVertex": [],
      "deleteEdge": []
    }
  }
]


### Exploring the Graph

#### Get Vertex and Edge Schema

In [58]:
results = conn.getVertexTypes()
print(f"Verticies: {results}")
vertices = results

results = conn.getEdgeTypes()
print(f"Edges: {results}")
edges = results

Verticies: ['word', 'synset']
Edges: ['is_a']


In [59]:

# print(f"Results for Post vertex")
# pprint(conn.getVertexType("word"))

print("-----------------")
print(f"Results for liked edge")
pprint(conn.getEdgeType("is_a"))


-----------------
Results for liked edge
{
  "IsDirected": true,
  "ToVertexTypeName": "*",
  "Config": {
    "REVERSE_EDGE": "reverse_is_a"
  },
  "Attributes": [
    {
      "AttributeType": {
        "Name": "STRING"
      },
      "IsPartOfCompositeKey": false,
      "PrimaryIdAsAttribute": false,
      "AttributeName": "type",
      "HasIndex": false,
      "internalAttribute": false,
      "IsPrimaryKey": false
    }
  ],
  "FromVertexTypeName": "*",
  "EdgePairs": [
    {
      "From": "synset",
      "To": "word"
    },
    {
      "From": "word",
      "To": "synset"
    }
  ],
  "Name": "is_a"
}


### Counting Data

In [60]:
print("Vertex Counts")
for vertex in vertices:
  print(f"There are {conn.getVertexCount(vertex)} {vertex} vertices in the graph")

print("--------------")
print("Edge Counts")
for edge in edges:
  print(f"There are {conn.getEdgeCount(edge)} {edge} edges in the graph")

Vertex Counts
There are 1647798 word vertices in the graph
There are 1494279 synset vertices in the graph
--------------
Edge Counts
There are 5020534 is_a edges in the graph


### Extracting Data

#### Vertex/Edge Set Format

##### Getting a Vertex

In [36]:
results = conn.getVerticesById("synset", "judicially.r.01")
pprint(results)

[
  {
    "v_id": "judicially.r.01",
    "v_type": "synset",
    "attributes": {
      "id": "judicially.r.01",
      "definition": "as ordered by a court"
    }
  }
]


##### Or Multiple Vertices

In [47]:
tdf1 = conn.getVerticesById("word", ["conflict.v","clash.v.02"])
pprint(tdf1)

[
  {
    "v_id": "conflict.v",
    "v_type": "word",
    "attributes": {
      "id": "conflict.v",
      "name": "conflict",
      "pos": "v"
    }
  },
  {
    "v_id": "clash.v.02",
    "v_type": "word",
    "attributes": {
      "id": "",
      "name": "",
      "pos": ""
    }
  }
]


##### Count Edges Connected to a Vertex

In [48]:
results = conn.getEdgeCountFrom("word", "conflict.v")
pprint(results)

{
  "is_a": 9,
  "reverse_is_a": 7
}


##### Show all Edges Connected to a Vertex

In [62]:
results = conn.getEdges("word", "conflict.v")
pprint(results)

[
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "contrast.v",
    "to_type": "synset",
    "attributes": {
      "type": "31"
    }
  },
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "disagree",
    "to_type": "synset",
    "attributes": {
      "type": "38"
    }
  },
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "overlap",
    "to_type": "synset",
    "attributes": {
      "type": "38"
    }
  },
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "at_odds",
    "to_type": "synset",
    "attributes": {
      "type": "38"
    }
  },
  {
    "e_type": "is_a",
    "directed": true,
    "from_id": "conflict.v",
    "from_type": "word",
    "to_id": "conflict.v.02",
    "to_type": "synset",
    "attributes": {
      "type":

#### As Pandas Dataframe
Supports all of the above in native Pandas Dataframe format.

##### All Vertices of one Type

In [50]:
df1 = conn.getVertexDataframe("word")
print(df1)

                                 v_id                            id  \
0                    friend_to_friend              friend_to_friend   
1        making_objects_appear_closer  making_objects_appear_closer   
2                   conveying_message             conveying_message   
3                   enhance_orchestra             enhance_orchestra   
4                 playing_flute_music           playing_flute_music   
...                               ...                           ...   
1647793        intradepartmental.a.01                                 
1647794                cybernate.v.01                                 
1647795              hystricidae.n.01                                 
1647796                 sedition.n.01                                 
1647797                     dull.s.05                                 

                                 name pos  
0                    friend_to_friend      
1        making_objects_appear_closer      
2              

##### One or More Vertex

In [51]:
df2 = conn.getVertexDataframeById("word", "conflict.v")
print(df2)

         v_id          id      name pos
0  conflict.v  conflict.v  conflict   v


##### Convert Vertex/Edge Set to Dataframe
We'll use the results from the 'Or Multiple Vertices' cell. 

In [52]:
df3 = conn.vertexSetToDataFrame(tdf1)
print(df3)

         v_id          id      name pos
0  conflict.v  conflict.v  conflict   v
1  clash.v.02                          


##### Get Edges

In [63]:
df4 = conn.getEdgesDataframe("word", "conflict.v",limit=3)
print(df4)

  from_type     from_id to_type       to_id type
0      word  conflict.v  synset  contrast.v   31
1      word  conflict.v  synset    disagree   38
2      word  conflict.v  synset     overlap   38


### Path Finding
Find paths between vertices.

Supported are:
- shortestPath - one shortest path between vertices
- allPaths - all paths within the specified edge limit

In [54]:
results = conn.shortestPath([("word", "in_due_time.r")], [("word", "in_due_season.r")])
pprint(results)

[{'type': 'word', 'id': 'in_due_time.r'}]
[{'type': 'word', 'id': 'in_due_season.r'}]
[
  {
    "vertices": [
      {
        "v_id": "when_time_comes.r",
        "v_type": "synset",
        "attributes": {
          "id": "",
          "definition": ""
        }
      },
      {
        "v_id": "in_due_time.r",
        "v_type": "word",
        "attributes": {
          "id": "in_due_time.r",
          "name": "in_due_time",
          "pos": "r"
        }
      },
      {
        "v_id": "in_due_season.r",
        "v_type": "word",
        "attributes": {
          "id": "in_due_season.r",
          "name": "in_due_season",
          "pos": "r"
        }
      }
    ],
    "edges": [
      {
        "e_type": "reverse_is_a",
        "from_id": "when_time_comes.r",
        "from_type": "synset",
        "to_id": "in_due_season.r",
        "to_type": "word",
        "directed": true,
        "attributes": {
          "type": "Synonym"
        }
      },
      {
        "e_type": "is_a",

In [22]:
q = conn.gsql('select * from word where name=="housing" limit 4')
pprint(q)

"[\n{\n\"v_id\": \"housing.n\",\n\"attributes\": {\n\"pos\": \"n\",\n\"name\": \"housing\",\n\"id\": \"housing.n\"\n},\n\"v_type\": \"word\"\n},\n{\n\"v_id\": \"housing\",\n\"attributes\": {\n\"pos\": \"\",\n\"name\": \"housing\",\n\"id\": \"housing\"\n},\n\"v_type\": \"word\"\n},\n{\n\"v_id\": \"housing.v\",\n\"attributes\": {\n\"pos\": \"v\",\n\"name\": \"housing\",\n\"id\": \"housing.v\"\n},\n\"v_type\": \"word\"\n}\n]"


In [41]:
q = conn.gsql('select * from word where name=="homelessness" limit 4')
q

'[\n{\n"v_id": "homelessness",\n"attributes": {\n"pos": "",\n"name": "homelessness",\n"id": "homelessness"\n},\n"v_type": "word"\n},\n{\n"v_id": "homelessness.n",\n"attributes": {\n"pos": "n",\n"name": "homelessness",\n"id": "homelessness.n"\n},\n"v_type": "word"\n}\n]'

In [42]:
results = conn.shortestPath([("word", "housing.n")], [("word", "homelessness.n")])
pprint(results)

[{'type': 'word', 'id': 'housing.n'}]
[{'type': 'word', 'id': 'homelessness.n'}]
[
  {
    "vertices": [
      {
        "v_id": "architecture",
        "v_type": "synset",
        "attributes": {
          "id": "",
          "definition": ""
        }
      },
      {
        "v_id": "housing.n",
        "v_type": "word",
        "attributes": {
          "id": "housing.n",
          "name": "housing",
          "pos": "n"
        }
      },
      {
        "v_id": "condition.n",
        "v_type": "synset",
        "attributes": {
          "id": "",
          "definition": ""
        }
      },
      {
        "v_id": "homelessness.n",
        "v_type": "word",
        "attributes": {
          "id": "homelessness.n",
          "name": "homelessness",
          "pos": "n"
        }
      },
      {
        "v_id": "niche.n",
        "v_type": "word",
        "attributes": {
          "id": "niche.n",
          "name": "niche",
          "pos": "n"
        }
      }
    ],
    "edges

In [45]:
q = conn.gsql('select * from word where name=="affordable" limit 4')
pprint(q)


"[\n{\n\"v_id\": \"affordable.s\",\n\"attributes\": {\n\"pos\": \"s\",\n\"name\": \"affordable\",\n\"id\": \"affordable.s\"\n},\n\"v_type\": \"word\"\n},\n{\n\"v_id\": \"affordable\",\n\"attributes\": {\n\"pos\": \"\",\n\"name\": \"affordable\",\n\"id\": \"affordable\"\n},\n\"v_type\": \"word\"\n},\n{\n\"v_id\": \"affordable.a\",\n\"attributes\": {\n\"pos\": \"a\",\n\"name\": \"affordable\",\n\"id\": \"affordable.a\"\n},\n\"v_type\": \"word\"\n}\n]"


# Find Some Commonality
Centraliy Attempt 1

In [23]:
results = conn.shortestPath([("word", "affordable.s")], [("word", "homelessness.n")])
pprint(results)

[{'type': 'word', 'id': 'affordable.s'}]
[{'type': 'word', 'id': 'homelessness.n'}]
[
  {
    "vertices": [
      {
        "v_id": "condition.n",
        "v_type": "synset",
        "attributes": {
          "id": "",
          "definition": ""
        }
      },
      {
        "v_id": "low-cost.s.01",
        "v_type": "synset",
        "attributes": {
          "id": "low-cost.s.01",
          "definition": "that you have the financial means for"
        }
      },
      {
        "v_id": "cheap.a.01",
        "v_type": "word",
        "attributes": {
          "id": "",
          "name": "",
          "pos": ""
        }
      },
      {
        "v_id": "cheap.a",
        "v_type": "synset",
        "attributes": {
          "id": "",
          "definition": ""
        }
      },
      {
        "v_id": "affordable.s",
        "v_type": "word",
        "attributes": {
          "id": "affordable.s",
          "name": "affordable",
          "pos": "s"
        }
      },
      {
  

In [1]:
import pandas as pd

person = pd.read_csv('../csv_imports/topics.csv')
person

Unnamed: 0,tid,title
0,pov.1,How to fight the affordable housing and climat...
1,pov.2,Firms who refuse to fund cladding repairs coul...
2,pov.3,How the homelessness crisis hit one of Califor...
3,pov.4,Business Sense | Lessons learned from past hou...
4,pov.5,Inflation Remains Relentless
5,pov.6,Community Land Trusts Fight for Neighborhood A...
6,pov.7,Climate change is spurring a movement to build...
7,pov.8,Canada’s Tax Free First Home Savings Account (...
8,pov.9,People can own property without breaking the bank
9,pov.10,Finding housing with a criminal record is hard...


In [18]:
print(person)
for row in person.itertuples():
    title = {}
    title = row[2].split(' ')
    centralness = 1
    for word in title:
        centralness = centralness * len(word)
    print(centralness)

      tid                                              title
0   pov.1  How to fight the affordable housing and climat...
1   pov.2  Firms who refuse to fund cladding repairs coul...
2   pov.3  How the homelessness crisis hit one of Califor...
3   pov.4  Business Sense | Lessons learned from past hou...
4   pov.5                       Inflation Remains Relentless
5   pov.6  Community Land Trusts Fight for Neighborhood A...
6   pov.7  Climate change is spurring a movement to build...
7   pov.8  Canada’s Tax Free First Home Savings Account (...
8   pov.9  People can own property without breaking the bank
9  pov.10  Finding housing with a criminal record is hard...
6350400
16934400
33592320
1756160
630
505440
2688000
829785600
290304
67737600


In [24]:
conn.getVertices("word", where='name=="affordable"',timeout=0)

[{'v_id': 'affordable.s',
  'v_type': 'word',
  'attributes': {'id': 'affordable.s', 'name': 'affordable', 'pos': 's'}},
 {'v_id': 'affordable',
  'v_type': 'word',
  'attributes': {'id': 'affordable', 'name': 'affordable', 'pos': ''}},
 {'v_id': 'affordable.a',
  'v_type': 'word',
  'attributes': {'id': 'affordable.a', 'name': 'affordable', 'pos': 'a'}}]

In [38]:
results = conn.shortestPath([("word", "affordable")], [("word", "homelessness")])
pprint( len(results[0]['edges']) )

[{'type': 'word', 'id': 'affordable'}]
[{'type': 'word', 'id': 'homelessness'}]
4


In [58]:
test_word = conn.getVertices("word", where='name=="trusts"',timeout=0)
print(test_word)
results2 = conn.shortestPath([("word", "housing")], [("word", "trusts")])
print(results2)

[{'v_id': 'trusts.v', 'v_type': 'word', 'attributes': {'id': 'trusts.v', 'name': 'trusts', 'pos': 'v'}}, {'v_id': 'trusts.n', 'v_type': 'word', 'attributes': {'id': 'trusts.n', 'name': 'trusts', 'pos': 'n'}}]
[{'type': 'word', 'id': 'housing'}]
[{'type': 'word', 'id': 'trusts'}]


TigerGraphException: ('word vertex "trusts" does not exist.', None)

In [72]:
answer = []
for row in person.itertuples():
    title_list = []
    title = row[2].split(' ')
    for word in title:
        if len(word) > 3:
          test_word = conn.getVertices("word", where='name=="' + word.lower() + '"',timeout=0)
          #print(test_word)
          if len(test_word) > 0:
            title_list.append(word.lower())
    print(title_list)
    centralness = 0
    for item in title_list:
      try:  
        results1 = conn.shortestPath([("word", "affordable")], [("word", item)])
        results2 = conn.shortestPath([("word", "housing")], [("word", item)])
        #print(results[0]['edges'])
        result1_v = 1
        result2_v = 1
        if (len(results1) >  0):
            result1_v = len(results1[0]['edges'])
        if (len(results2) >  0):
            result2_v = len(results2[0]['edges'])   
        centralness = centralness + result1_v * result2_v
      except:
        centralness = centralness + 99
    #print(centralness / (len(title_list) + 1))
    answer.append(list([row[1],row[2], (centralness / (len(title_list) + 1))]))



['fight', 'affordable', 'housing', 'climate', 'crises', 'once']
[{'type': 'word', 'id': 'affordable'}]
[{'type': 'word', 'id': 'fight'}]
[{'type': 'word', 'id': 'housing'}]
[{'type': 'word', 'id': 'fight'}]
[{'type': 'word', 'id': 'affordable'}]
[{'type': 'word', 'id': 'affordable'}]
[{'type': 'word', 'id': 'housing'}]
[{'type': 'word', 'id': 'affordable'}]
[{'type': 'word', 'id': 'affordable'}]
[{'type': 'word', 'id': 'housing'}]
[{'type': 'word', 'id': 'housing'}]
[{'type': 'word', 'id': 'housing'}]
[{'type': 'word', 'id': 'affordable'}]
[{'type': 'word', 'id': 'climate'}]
[{'type': 'word', 'id': 'housing'}]
[{'type': 'word', 'id': 'climate'}]
[{'type': 'word', 'id': 'affordable'}]
[{'type': 'word', 'id': 'crises'}]
[{'type': 'word', 'id': 'housing'}]
[{'type': 'word', 'id': 'crises'}]
[{'type': 'word', 'id': 'affordable'}]
[{'type': 'word', 'id': 'once'}]
[{'type': 'word', 'id': 'housing'}]
[{'type': 'word', 'id': 'once'}]
['firms', 'refuse', 'fund', 'cladding', 'repairs', 'could', 

[['pov.1',
  'How to fight the affordable housing and climate crises at once',
  9.142857142857142],
 ['pov.2',
  'Firms who refuse to fund cladding repairs could face trading ban',
  12.444444444444445],
 ['pov.3',
  'How the homelessness crisis hit one of California’s most affordable cities',
  9.333333333333334],
 ['pov.4',
  'Business Sense | Lessons learned from past housing crunches',
  11.555555555555555],
 ['pov.5', 'Inflation Remains Relentless', 12.0],
 ['pov.6',
  'Community Land Trusts Fight for Neighborhood Affordability',
  22.142857142857142],
 ['pov.7',
  'Climate change is spurring a movement to build stormproof homes',
  19.375],
 ['pov.8',
  'Canada’s Tax Free First Home Savings Account (FHSA): Saver Account for Downpayments',
  17.666666666666668],
 ['pov.9',
  'People can own property without breaking the bank',
  9.333333333333334],
 ['pov.10',
  'Finding housing with a criminal record is hard. HUD wants to change that',
  10.666666666666666]]

In [79]:
#Sort Pandas
def mySort(e):
    return e[2]

answer.sort(key=mySort, reverse=True)

In [80]:
answer

[['pov.6',
  'Community Land Trusts Fight for Neighborhood Affordability',
  22.142857142857142],
 ['pov.7',
  'Climate change is spurring a movement to build stormproof homes',
  19.375],
 ['pov.8',
  'Canada’s Tax Free First Home Savings Account (FHSA): Saver Account for Downpayments',
  17.666666666666668],
 ['pov.2',
  'Firms who refuse to fund cladding repairs could face trading ban',
  12.444444444444445],
 ['pov.5', 'Inflation Remains Relentless', 12.0],
 ['pov.4',
  'Business Sense | Lessons learned from past housing crunches',
  11.555555555555555],
 ['pov.10',
  'Finding housing with a criminal record is hard. HUD wants to change that',
  10.666666666666666],
 ['pov.3',
  'How the homelessness crisis hit one of California’s most affordable cities',
  9.333333333333334],
 ['pov.9',
  'People can own property without breaking the bank',
  9.333333333333334],
 ['pov.1',
  'How to fight the affordable housing and climate crises at once',
  9.142857142857142]]