In [3]:
# Import torch & Check CUDA availability
import torch

print(torch.cuda.is_available())
print(torch.cuda.device_count())
print(torch.cuda.current_device())

True
1
0


In [4]:
# Get CUDA device name
print(torch.cuda.device(0))
print(torch.cuda.get_device_name(0))

<torch.cuda.device object at 0x7f38a3b88650>
NVIDIA A30


### Graph500_Scale23_EdgeFactor32

#### Read binary file first

In [3]:
import struct

file = open("/dfs6/pub/seminl1/graph500/graph500_scale23_ef32.edges", "rb")

edges=[]
while True:
    value = file.read(8)
    if len(value) != 8:
        break
    else:
        (v1, v2) = struct.unpack("II", value)
        edges.append(v1)
        edges.append(v2)

file.close()

In [4]:
print(len(edges))
print(max(edges))
print(min(edges))

805306368
8388607
0


#### Edge index

In [5]:
import numpy as np

# Change edge_index list to numpy array first
edges = np.array(edges)
print(edges.shape)
# Reshape edge_index numpy array to 2-dimensional
edges = edges.reshape(-1, 2)
print(edges.shape)

(805306368,)
(402653184, 2)


In [6]:
# Delete duplicates of the edges (It's meaningless having the duplicates edges)
edges = np.unique(edges, axis=0)
print(edges.shape)

(136320066, 2)


The number of edges written in 'Barad-dur' is impossible

### Graph500_Scale23_EdgeFactor64

#### Read binary file first

In [3]:
import struct

file = open("/dfs6/pub/seminl1/graph500/graph500_scale23_ef64.edges", "rb")

edges=[]
while True:
    value = file.read(8)
    if len(value) != 8:
        break
    else:
        (v1, v2) = struct.unpack("II", value)
        edges.append(v1)
        edges.append(v2)

file.close()

In [4]:
print(len(edges))
print(max(edges))
print(min(edges))

1610612736
8388607
0


#### Edge index

In [5]:
import numpy as np

# Change edge_index list to numpy array first
edges = np.array(edges)
print(edges.shape)
# Reshape edge_index numpy array to 2-dimensional
edges = edges.reshape(-1, 2)
print(edges.shape)

(1610612736,)
(805306368, 2)


In [6]:
# Delete duplicates of the edges (It's meaningless having the duplicates edges)
edges = np.unique(edges, axis=0)
print(edges.shape)

(261760844, 2)


In [7]:
# Make edge_index as tensor for using it on PyTorch Geometric
# dtype should be torch.int64
import torch_geometric

edges = torch.tensor(edges, dtype=torch.int64)
print(edges.shape)

torch.Size([261760844, 2])


#### Node feature matrix

In [8]:
# Make node feature matrix by our own
# 8388608(#nodes) x 64(#features)
import random

x=[]
tmp = []
for i in range(8388608):
    for j in range(64):
        r = random.uniform(-2.5, 2.5)
        while r in tmp:
            r = random.uniform(-2.5, 2.5)
        tmp.append(r)
    x.extend(tmp)
    tmp.clear()

print(len(x))

536870912


In [9]:
# Change node feature matrix(list) to numpy array first
x = np.array(x)
print(x.shape)
# Reshape node feature matrix(numpy array) to 2-dimensional
x = x.reshape(-1, 64)
print(x.shape)

(536870912,)
(8388608, 64)


In [10]:
# Make node feature matrix as tensor for using it on PyTorch Geometric
# dtype should be torch.float32
x = torch.tensor(x, dtype=torch.float32)
print(x.shape)

torch.Size([8388608, 64])


#### Ground-truth labels

In [11]:
# Make ground-truth labels by our own
y=[]
for i in range(8388608):
    r = random.randrange(0, 64)
    y.append(r)

print(len(y))

8388608


In [12]:
# Make ground-truth lables as tensor for using it on PyTorch Geometric
# dtype should be torch.int64
y = torch.tensor(y, dtype=torch.int64)
print(y.shape)

torch.Size([8388608])


#### Make all components as PyTorch dataset

In [13]:
# Make node feature matrix, edge index, ground-truth labels as PyTorch Dataset
from torch_geometric.data import Data
data = Data(x=x, edge_index=edges.t().contiguous(), y=y)
print(data)

Data(x=[8388608, 64], edge_index=[2, 261760844], y=[8388608])


In [14]:
print('The current dataset is undirected:', data.is_undirected())

The current dataset is undirected: False


In [15]:
import torch_geometric.transforms as T

data = T.ToUndirected()(data)
print('The current data is undirected:', data.is_undirected())

The current data is undirected: True


#### Save the PyTorch dataset

In [16]:
torch.save(data, "/dfs6/pub/seminl1/graph500/graph500_scale23_ef64.pt")

In [18]:
data = torch.load("/dfs6/pub/seminl1/graph500/graph500_scale23_ef64.pt", mmap=True)

In [19]:
print(data)

Data(x=[8388608, 64], edge_index=[2, 517557419], y=[8388608])


### Graph500_Scale24_EdgeFactor32

#### Read binary file first

In [20]:
import struct

file = open("/dfs6/pub/seminl1/graph500/graph500_scale24_ef32.edges", "rb")

edges=[]
while True:
    value = file.read(8)
    if len(value) != 8:
        break
    else:
        (v1, v2) = struct.unpack("II", value)
        edges.append(v1)
        edges.append(v2)

file.close()

In [21]:
print(len(edges))
print(max(edges))
print(min(edges))

1610612736
16777215
0


#### Edge index

In [22]:
import numpy as np

# Change edge_index list to numpy array first
edges = np.array(edges)
print(edges.shape)
# Reshape edge_index numpy array to 2-dimensional
edges = edges.reshape(-1, 2)
print(edges.shape)

(1610612736,)
(805306368, 2)


In [23]:
# Delete duplicates of the edges (It's meaningless having the duplicates edges)
edges = np.unique(edges, axis=0)
print(edges.shape)

(273771147, 2)


The number of edges of scale24EdgeFactor32 is not enough to fit our targetting dataset cluster constitution

### Graph500_Scale24_EdgeFactor64

#### Read binary file first

In [1]:
import struct

file = open("/dfs6/pub/seminl1/graph500/graph500_scale24_ef64.edges", "rb")

edges=[]
while True:
    value = file.read(8)
    if len(value) != 8:
        break
    else:
        (v1, v2) = struct.unpack("II", value)
        edges.append(v1)
        edges.append(v2)

file.close()

In [2]:
print(len(edges))
print(max(edges))
print(min(edges))

3221225472
16777215
0


#### Edge index

In [5]:
import numpy as np

# Change edge_index list to numpy array first
edges = np.array(edges)
print(edges.shape)
# Reshape edge_index numpy array to 2-dimensional
edges = edges.reshape(-1, 2)
print(edges.shape)

(3221225472,)
(1610612736, 2)


In [6]:
# Delete duplicates of the edges (It's meaningless having the duplicates edges)
edges = np.unique(edges, axis=0)
print(edges.shape)

(527928564, 2)


In [7]:
# Make edge_index as tensor for using it on PyTorch Geometric
# dtype should be torch.int64
import torch_geometric

edges = torch.tensor(edges, dtype=torch.int64)
print(edges.shape)

torch.Size([527928564, 2])


#### Node feature matrix

In [8]:
# Make node feature matrix by our own
# 16777216(#nodes) x 64(#features)
import random

x=[]
tmp = []
for i in range(16777216):
    for j in range(64):
        r = random.uniform(-2.5, 2.5)
        while r in tmp:
            r = random.uniform(-2.5, 2.5)
        tmp.append(r)
    x.extend(tmp)
    tmp.clear()

print(len(x))

1073741824


In [9]:
# Change node feature matrix(list) to numpy array first
x = np.array(x)
print(x.shape)
# Reshape node feature matrix(numpy array) to 2-dimensional
x = x.reshape(-1, 64)
print(x.shape)

(1073741824,)
(16777216, 64)


In [10]:
# Make node feature matrix as tensor for using it on PyTorch Geometric
# dtype should be torch.float32
x = torch.tensor(x, dtype=torch.float32)
print(x.shape)

torch.Size([16777216, 64])


#### Ground-truth labels

In [11]:
# Make ground-truth labels by our own
y=[]
for i in range(16777216):
    r = random.randrange(0, 64)
    y.append(r)

print(len(y))

16777216


In [12]:
# Make ground-truth lables as tensor for using it on PyTorch Geometric
# dtype should be torch.int64
y = torch.tensor(y, dtype=torch.int64)
print(y.shape)

torch.Size([16777216])


#### Make all components as PyTorch dataset

In [13]:
# Make node feature matrix, edge index, ground-truth labels as PyTorch Dataset
from torch_geometric.data import Data
data = Data(x=x, edge_index=edges.t().contiguous(), y=y)
print(data)

Data(x=[16777216, 64], edge_index=[2, 527928564], y=[16777216])


In [14]:
print('The current dataset is undirected:', data.is_undirected())

The current dataset is undirected: False


In [15]:
import torch_geometric.transforms as T

data = T.ToUndirected()(data)
print('The current data is undirected:', data.is_undirected())

The current data is undirected: True


#### Save the PyTorch dataset

In [16]:
torch.save(data, "/dfs6/pub/seminl1/graph500/graph500_scale24_ef64.pt")

In [17]:
data = torch.load("/dfs6/pub/seminl1/graph500/graph500_scale24_ef64.pt", mmap=True)

In [18]:
print(data)

Data(x=[16777216, 64], edge_index=[2, 1044447191], y=[16777216])
