Imports

In [1]:
#!pip install nest_asyncio
import nest_asyncio
nest_asyncio.apply()

In [2]:
from gremlin_python.process.anonymous_traversal import traversal
from gremlin_python.driver.driver_remote_connection import DriverRemoteConnection
from gremlin_python.driver import client, serializer

Sample Graph Creation

In [3]:
g = traversal().withRemote(DriverRemoteConnection('ws://localhost:8182/gremlin','g'))

In [4]:
# Prepare the query to delete all vertices (nodes)
query = g.V().drop()

# Execute the query to delete all vertices
query.iterate()


[['V'], ['drop'], ['none'], ['values', '_ipython_canary_method_should_not_exist_'], ['values', '_ipython_canary_method_should_not_exist_']]

In [5]:
# # Insert a new vertex
# new_vertex = g.addV('person').property('name', 'John').property('age', 30).next()

# # Insert another vertex
# another_vertex = g.addV('person').property('name', 'Alice').property('age', 25).next()

# # Insert an edge between the vertices
# g.V(new_vertex).addE('knows').to(g.V(another_vertex)).next()

Printing all nodes of the graph

In [6]:
# g.V().valueMap(True).toList()

In [7]:
all_vertices = g.V().valueMap(True).toList()

# Print the nodes and their properties
for vertex_properties in all_vertices:
    print(vertex_properties)

Function to create a graph

In [8]:
def create_graph_node(label, properties):
    try:
        # Prepare the query to add a node
        query = g.addV(label)

        # Add properties to the node
        for key, value in properties.items():
            query = query.property(key, value)

        # Execute the query
        result_set = query.toList()
    finally:
        return result_set

In [9]:
# node_label = "person"
# node_properties = {
#     "name": "John",
#     "age": 30,
#     "city": "New York",
# }
# created_node = create_graph_node(node_label, node_properties)
# print("Created Node:", created_node)

In [10]:
def get_node_properties_by_id(node_id):
    result = "Pending"
    try:
        # Prepare the query to get the node by its ID
        query = g.V(node_id)

        # Execute the query and get the properties of the node
        result_set = query.valueMap(True).toList()

        if result_set:
            # The query returns a list of dictionaries with properties as keys and lists of values.
            # Since we queried for a single node, we extract the first element from the list.
            node_properties = result_set[0]
            result =  node_properties
        else:
            result = None

    finally:
        return result

In [11]:
# node_id = 16568

# node_properties = get_node_properties_by_id(node_id)
# if node_properties:
#     print("Properties of Node with ID", node_id, ":", node_properties)
# else:
#     print("Node with ID", node_id, "not found.")

Create graph from CSV

In [12]:
import csv

file_path = "/home/smeet/Downloads/Bajaj Mall Data.csv"

# Open the CSV file in read mode
with open(file_path, mode='r', newline='') as csvfile:
    # Create a CSV reader object
    csv_reader = csv.reader(csvfile)
    
    # Read and process each row in the CSV file
    for row in csv_reader:
        # 'row' will be a list representing each row in the CSV file
        # You can access individual elements in the row using their indexes
        id = row[0]
        name = row[1]
        category = row[2]
        price = row[3]
        rating = row[4]
        
        # Now you can do whatever you want with the data in the row
        #print(f"ID: {id}, Name: {name}, Category: {category}, Price: {price}, Rating: {rating}")
        
        node_label = "product"
        node_properties = {
            "id": id,
            "name": name,
            "category": category,
            "price":price,
            "rating":rating
        }
        created_node = create_graph_node(node_label, node_properties)
        print("Created Node:", created_node)

Created Node: [v[90176]]
Created Node: [v[36976]]
Created Node: [v[94272]]
Created Node: [v[98400]]
Created Node: [v[41072]]
Created Node: [v[45168]]
Created Node: [v[16552]]
Created Node: [v[53352]]
Created Node: [v[102496]]
Created Node: [v[57448]]
Created Node: [v[106680]]
Created Node: [v[106592]]
Created Node: [v[98368]]
Created Node: [v[36952]]
Created Node: [v[110688]]
Created Node: [v[49264]]
Created Node: [v[114784]]
Created Node: [v[102464]]
Created Node: [v[110776]]
Created Node: [v[118880]]
Created Node: [v[106560]]
Created Node: [v[114872]]
Created Node: [v[110656]]
Created Node: [v[20648]]
Created Node: [v[122976]]
Created Node: [v[127072]]
Created Node: [v[53360]]
Created Node: [v[118968]]
Created Node: [v[131168]]
Created Node: [v[135264]]
Created Node: [v[123064]]


In [13]:
all_vertices = g.V().valueMap(True).toList()

# Print the nodes and their properties
for vertex_properties in all_vertices:
    print(vertex_properties)

{<T.id: 1>: 90176, <T.label: 4>: 'product', 'name': ['NAME'], 'price': ['PRICE'], 'rating': ['AVGRATING'], 'id': ['ID'], 'category': ['CATEGORY']}
{<T.id: 1>: 94272, <T.label: 4>: 'product', 'name': ['Apka Interior Metallic Sofa cum Bed with Cushions (Finish Color - RED Rectangular Slide Shape)'], 'price': ['21920'], 'rating': ['3.3'], 'id': ['1'], 'category': ['Beds']}
{<T.id: 1>: 98368, <T.label: 4>: 'product', 'name': ['Whirlpool 240 L Frost Free Triple Door Refrigerator Alpha Steel (FP 263D PROTTON ROY)'], 'price': ['26690'], 'rating': ['3.9'], 'id': ['11'], 'category': ['Fridge']}
{<T.id: 1>: 102464, <T.label: 4>: 'product', 'name': ['OnePlus Nord CE 2 Lite 5G 128 GB Storage Blue Tide (6 GB RAM)'], 'price': ['17999'], 'rating': ['3.3'], 'id': ['16'], 'category': ['Smartphone']}
{<T.id: 1>: 106560, <T.label: 4>: 'product', 'name': ['Samsung Galaxy A03 32 GB Storage Black (3 GB RAM)'], 'price': ['10299'], 'rating': ['2.9'], 'id': ['19'], 'category': ['Smartphone']}
{<T.id: 1>: 11065

Create edges from TSV

In [14]:
def add_edges_from_edgelist(edge_list):

    try:
        # Iterate through the edge list and add edges between existing nodes
        for source_id, edge_label, target_id in edge_list:
            query = graph.traversal().V(source_id).addE(edge_label).to(traversal_source).V(target_id)
            query.iterate()

    finally:
        # Close the connection to Gremlin Server
        graph.close()

In [15]:
# # Example edge list: (source_id, edge_label, target_id)
# edge_list = [
#     (1, 'knows', 2),
#     (2, 'likes', 3),
#     (1, 'works_with', 4),
#     # Add more edges as needed
# ]

# add_edges_from_edgelist(edge_list)

In [16]:
def create_edges_with_id(id1, id2, edge_label):
    try:
        # Get nodes with the specified property and their IDs
        n1 = g.V().has("id", id1).id().toList()[0]
        n2 = g.V().has("id", id2).id().toList()[0]
    
    
        #query = graph.traversal().V(n1).addE(edge_label).to(n1).V(n1)
        query = g.V(n1).addE(edge_label).to(g.V(n2))
    
        query.iterate()
    finally:
        return

In [17]:
create_edges_with_id(0, 7, "coPurchased_with")

In [18]:
tsv_path = "/home/smeet/Downloads/edgelist.txt"

# Open the TSV file in read mode, set the delimiter to '\t' for TSV
with open(tsv_path, mode='r', newline='') as tsvfile:
    # Create a CSV reader object with the specified delimiter
    tsv_reader = csv.reader(tsvfile, delimiter='\t')
    
    # Read and process each row in the TSV file
    for row in tsv_reader:
        if(len(row)<2): continue
        # 'row' will be a list representing each row in the TSV file
        # You can access individual elements in the row using their indexes
        id1 = int(row[0])
        id2 = int(row[1])
        
        # Now you can do whatever you want with the data in the row
        create_edges_with_id(id1, id2, "coPurchased_with")


In [25]:
g.V(110656).both().toList()

[v[131168]]

In [20]:
import psutil
import time

# Function to simulate some operation
def perform_operation():
    for _ in range(1000000):
        # Simulate a computation-intensive task
        
        result = 2 * 2
# Get memory usage in bytes before starting the operation
memory_before = psutil.virtual_memory().used

# Get CPU usage as a percentage before starting the operation
cpu_usage_before = psutil.cpu_percent(interval=None, percpu=False)

# Start the operation
start_time = time.time()
perform_operation()
end_time = time.time()

memory_after = psutil.virtual_memory().used
# Calculate the memory usage during the operation
memory_usage_during_operation = memory_after - memory_before

# Get CPU usage as a percentage after finishing the operation
cpu_usage_after = psutil.cpu_percent(interval=None, percpu=False)

# Calculate the CPU usage during the operation
cpu_usage_during_operation = cpu_usage_after - cpu_usage_before

# Calculate the time taken for the operation
operation_duration = end_time - start_time

print(f"Memory Usage Before: {memory_before/1000000} mb")
print(f"CPU Usage Before: {cpu_usage_before}%")
print(f"Memory Usage After: {memory_after/1000000} mb")
print(f"CPU Usage After: {cpu_usage_after}%")
print(f"Memory Usage During Operation: {memory_usage_during_operation/1000} kb")
print(f"CPU Usage During Operation: {cpu_usage_during_operation}%")
print(f"Operation Duration: {operation_duration:.4f} seconds")


Memory Usage Before: 6254.747648 mb
CPU Usage Before: 9.5%
Memory Usage After: 6254.747648 mb
CPU Usage After: 36.8%
Memory Usage During Operation: 0.0 kb
CPU Usage During Operation: 27.299999999999997%
Operation Duration: 0.0523 seconds


In [34]:
def generate_graph_statistics_report():
    #try:
    # Query for the total number of vertices (nodes) in the graph
    total_vertices_query = g.V().count().toList()
    total_vertices = total_vertices_query[0]

    # Query for the total number of edges in the graph
    total_edges_query = g.E().count().toList()
    total_edges = total_edges_query[0]

    # Query for the average degree of the vertices in the graph
    sum_degree_query = g.V().bothE().count().toList()
    sum_degree = sum_degree_query[0]

    # # Query for the number of vertices per label (node type) in the graph
    # vertices_per_label_query = g.V().groupCount().by('coPurchased_with').toList()
    # vertices_per_label = {label: count for label, count in vertices_per_label_query[0].items()}

    # # Query for the number of edges per label in the graph
    # edges_per_label_query = g.E().groupCount().by('coPurchased_with').toList()
    # edges_per_label = {label: count for label, count in edges_per_label_query[0].items()}

    # Query for other statistics as needed...

    # Generate the report
    report = {
        "Total Vertices": total_vertices,
        "Total Edges": total_edges,
        "Average Degree": sum_degree/total_vertices,
        #"Vertices per Label": vertices_per_label,
        #"Edges per Label": edges_per_label,
        # Add other statistics to the report as needed...
    }

    return report

    # finally:
    #     # Close the connection to Gremlin Server
    #     graph.close()



In [35]:
graph_statistics_report = generate_graph_statistics_report()
print("Graph Statistics Report:")
for key, value in graph_statistics_report.items():
    print(f"{key}: {value}")

Graph Statistics Report:
Total Vertices: 31
Total Edges: 45
Average Degree: 2.903225806451613
