In [5]:
import json

import requests
import boto3
from gremlin_python.process.anonymous_traversal import traversal
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.driver.aiohttp.transport import AiohttpTransport 
from gremlin_python.process.graph_traversal import __
from gremlin_python.process.traversal import P
from gremlin_python.process.traversal import T
from gremlin_python.statics import long 

Дождитесь загрузки графа. В консоли будет сообщение "CORA Dataset uploading is over". Граф содержит 2708 вершин и 10556 рёбер.

In [5]:
g = traversal().with_remote(DriverRemoteConnection(
    'ws://janusgraph:8182/gremlin','g',
    transport_factory=lambda:AiohttpTransport(call_from_event_loop=True))
)

g.V().hasLabel("paper").count().next(), g.E().hasLabel("citation").count().next()

(2708, 10556)

In [8]:
JANUS_GRAPH_ENDPOINT_URL = "ws://janusgraph:8182/gremlin"
S3_ENDPOINT_URL = "http://s3:9000"
EXPORT_SERVICE_URL = "http://export_service:8081/export"
PROCESSING_SERVICE_URL = "http://processing_service:8082/processing"
TRAIN_SERVICE_URL = "http://train_service:8083/modeltraining"
ENDPOINT_SERVICE_URL = "http://endpoint_service:8084/call"

In [None]:
cora_export = {
    "command": "export-pg",
    "output_s3_path": S3_ENDPOINT_URL,
    "bucket": "test-bucket",
    "params": {
        "endpoint": JANUS_GRAPH_ENDPOINT_URL
    },
    "additional_params": {
        "jobs": [
            {
                "name": "cora_job",
                "target": {
                    "node": "paper",
                    "property": "category",
                    "type": "classification",
                    "split_rate": [0.1, 0.2, 0.7],
                },
                "features" :[
                    {
                        "node": "paper",
                        "property": "word_vector",
                        "type": "float_array"
                    },
                    {
                        "edge": ["paper", "citation", "paper"]
                    }
                ]
            }
        ]
    }
}

requests.post(
    EXPORT_SERVICE_URL, data=json.dumps(cora_export)
)

In [None]:
cora_proc = {
    "s3_endpoint_url": S3_ENDPOINT_URL,
    "bucket": "test-bucket",
    "job_path": "janusgraph_ml/cora_job",
    "processed_data_s3_location": "janusgraph_ml/cora_processed",
    "config_file_name": "train_config.json"
}

requests.post(
    PROCESSING_SERVICE_URL, data=json.dumps(cora_proc)
)

In [None]:
cora_train = {
    "s3_params": {
      "bucket": "test-bucket",
      "s3_endpoint_url": S3_ENDPOINT_URL
    },
    "train_config_s3_key": "janusgraph_ml/cora_job/train_config.json",
    "processing_config_s3_key": "janusgraph_ml/cora_processed/processing_config.json" 
}

requests.post(TRAIN_SERVICE_URL, data=json.dumps(cora_train))

In [27]:
g = traversal().with_remote(DriverRemoteConnection(
    'ws://localhost:8182/gremlin','g',
    transport_factory=lambda:AiohttpTransport(call_from_event_loop=True))
)

In [None]:
nodes = g.V().hasLabel('paper').project('id', 'category').by(T.id).by('category').next(10)

nodes

In [26]:
prediction = (
    g.call("predict")
        .with_("endpoint_id", "cora_job")
        .with_("predict_entity_idx", '788578368')
        .with_("interface", "inductive")
        .next()
)

prediction

'Bad response with code 500'

In [None]:
prediction = (
    g.call("predict")
        .with_("endpoint_id", "cora_job")
        .with_("predict_entity_idx", '788607040')
        .with_("interface", "transductive")
        .next()
)

prediction

In [6]:
from dgl.data import TexasDataset

In [11]:
dataset = TexasDataset()
g = dataset[0]

Done loading data from cached files.


In [22]:
import csv
import os

In [31]:
with open('util/data/texas_nodes.csv', 'w', newline='') as wf:
    fieldnames = ['node_id', 'word_vector', 'category']
        
    writer = csv.DictWriter(wf, fieldnames=fieldnames)
    
    writer.writeheader()
    for i, (word_vector, category) in enumerate(zip(g.ndata["feat"], g.ndata["label"])):
        writer.writerow({
            'node_id': i,
            'word_vector': word_vector.tolist(),
            'category': int(category)
        })

In [35]:
with open('util/data/texas_edges.csv', 'w', newline='') as wf:
    fieldnames = ['src_id', 'dst_id']
        
    writer = csv.DictWriter(wf, fieldnames=fieldnames)
    
    writer.writeheader()
    for src, dst in zip(g.edges()[0], g.edges()[1]):
        writer.writerow({
            'src_id': int(src),
            'dst_id': int(dst)
        })

In [1]:
from util.gremlin import upload_texas

In [3]:
upload_texas("ws://localhost:8182/gremlin")

In [6]:
g = traversal().with_remote(DriverRemoteConnection(
    'ws://localhost:8182/gremlin','g',
    transport_factory=lambda:AiohttpTransport(call_from_event_loop=True))
)

In [7]:
g.V().hasLabel("page").count().next(), g.E().hasLabel("hyperlink").count().next()

(183, 325)

In [10]:
JANUS_GRAPH_ENDPOINT_URL = "ws://janusgraph:8182/gremlin"
S3_ENDPOINT_URL = "http://s3:9000"
EXPORT_SERVICE_URL = "http://localhost:8081/export"
PROCESSING_SERVICE_URL = "http://localhost:8082/processing"
TRAIN_SERVICE_URL = "http://localhost:8083/modeltraining"
ENDPOINT_SERVICE_URL = "http://localhost:8084/call"

In [12]:
texas_export = {
    "command": "export-pg",
    "output_s3_path": S3_ENDPOINT_URL,
    "bucket": "test-bucket",
    "params": {
        "endpoint": JANUS_GRAPH_ENDPOINT_URL
    },
    "additional_params": {
        "jobs": [
            {
                "name": "texas_job",
                "target": {
                    "node": "page",
                    "property": "category",
                    "type": "classification",
                    "split_rate": [0.4, 0.3, 0.3],
                },
                "features" :[
                    {
                        "node": "page",
                        "property": "word_vector",
                        "type": "float_array"
                    },
                    {
                        "edge": ["page", "hyperlink", "page"]
                    }
                ]
            }
        ]
    }
}

requests.post(
    EXPORT_SERVICE_URL, data=json.dumps(texas_export)
)

<Response [200]>

In [18]:
texas_proc = {
    "s3_endpoint_url": S3_ENDPOINT_URL,
    "bucket": "test-bucket",
    "job_path": "janusgraph_ml/texas_job",
    "processed_data_s3_location": "janusgraph_ml/texas_processed",
    "config_file_name": "train_config.json"
}

requests.post(
    PROCESSING_SERVICE_URL, data=json.dumps(texas_proc)
)

<Response [200]>

In [19]:
texas_train = {
    "s3_params": {
      "bucket": "test-bucket",
      "s3_endpoint_url": S3_ENDPOINT_URL
    },
    "train_config_s3_key": "janusgraph_ml/texas_job/train_config.json",
    "processing_config_s3_key": "janusgraph_ml/texas_processed/processing_config.json" 
}

requests.post(TRAIN_SERVICE_URL, data=json.dumps(texas_train))

<Response [200]>

In [23]:
nodes = g.V().hasLabel('page').project('id', 'category').by(T.id).by('category').next(10)

nodes

ClientConnectorError: Cannot connect to host localhost:8182 ssl:default [Connect call failed ('127.0.0.1', 8182)]