In [1]:
import os

os.environ["DGLBACKEND"] = "pytorch"
import dgl
import numpy as np
import torch

g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]), num_nodes=6)
# Equivalently, PyTorch LongTensors also work.
g = dgl.graph(
    (torch.LongTensor([0, 0, 0, 0, 0]), torch.LongTensor([1, 2, 3, 4, 5])),
    num_nodes=6,
)

# You can omit the number of nodes argument if you can tell the number of nodes from the edge list alone.
g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]))



In [None]:
# Assign a 3-dimensional node feature vector for each node.
g.ndata["x"] = torch.randn(6, 3)
# Assign a 4-dimensional edge feature vector for each edge.
g.edata["a"] = torch.randn(5, 4)
# Assign a 5x4 node feature matrix for each node.  Node and edge features in DGL can be multi-dimensional.
g.ndata["y"] = torch.randn(6, 5, 4)

print(g.edata["a"])

tensor([[-1.0610, -2.6291, -1.5116,  1.2287],
        [ 0.4833, -1.6545, -1.1173,  0.0300],
        [ 0.0089,  0.4669,  0.4350,  1.2601],
        [ 0.7695,  2.0895, -1.7074, -0.5123],
        [ 0.0142, -0.0962, -1.2562, -0.8136]])


## Quantifying Graph Structure

In [None]:
print(g.num_nodes())
print(g.num_edges())
# Out degrees of the center node
print(g.out_degrees(0))
# In degrees of the center node - note that the graph is directed so the in degree should be 0.
print(g.in_degrees(0))

6
5
5
0


In [None]:
# Induce a subgraph from node 0, node 1 and node 3 from the original graph.
sg1 = g.subgraph([0, 1, 3])
# Induce a subgraph from edge 0, edge 1 and edge 3 from the original graph.
sg2 = g.edge_subgraph([0, 1, 3])

In [None]:
# The original node feature of each node in sg1
print(sg1.ndata["x"].shape)
# The original edge feature of each node in sg1
print(sg1.edata["a"].shape)
# The original node feature of each node in sg2
print(sg2.ndata["x"])
# The original edge feature of each node in sg2
print(sg2.edata["a"])

torch.Size([3, 3])
torch.Size([2, 4])
tensor([[-0.3360, -0.9976, -2.4161],
        [ 2.4269, -0.9421,  0.6361],
        [ 1.2461,  0.8691,  1.0383],
        [ 0.8714, -0.4506, -0.8767]])
tensor([[-1.0610, -2.6291, -1.5116,  1.2287],
        [ 0.4833, -1.6545, -1.1173,  0.0300],
        [ 0.7695,  2.0895, -1.7074, -0.5123]])


## Save and Load Graphs

In [None]:
dgl.save_graphs("graph.dgl", g)

In [None]:
a,b = dgl.load_graphs("graph.dgl")
print(a)
print("-----")
print(b)

[Graph(num_nodes=6, num_edges=5,
      ndata_schemes={'y': Scheme(shape=(5, 4), dtype=torch.float32), 'x': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={'a': Scheme(shape=(4,), dtype=torch.float32)})]
-----
{}


## Custom GNN for Graph Classification Example

In [None]:
import os

os.environ["DGLBACKEND"] = "pytorch"
import dgl
import dgl.data
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
# Generate a synthetic dataset with 10000 graphs, ranging from 10 to 500 nodes.
dataset = dgl.data.GINDataset("PROTEINS", self_loop=True)

Downloading /home/featurize/.dgl/GINDataset.zip from https://raw.githubusercontent.com/weihua916/powerful-gnns/master/dataset.zip...
Extracting file to /home/featurize/.dgl/GINDataset


In [None]:
print("Node feature dimensionality:", dataset.dim_nfeats)
print("Number of graph categories:", dataset.gclasses)


from dgl.dataloading import GraphDataLoader

Node feature dimensionality: 3
Number of graph categories: 2


In [None]:
print(dataset.graph_label)

AttributeError: 'GINDataset' object has no attribute 'graph_label'

### Define a Graph Dataloader

In [None]:
from torch.utils.data.sampler import SubsetRandomSampler

num_examples = len(dataset)
num_train = int(num_examples * 0.8)

train_sampler = SubsetRandomSampler(torch.arange(num_train))
test_sampler = SubsetRandomSampler(torch.arange(num_train, num_examples))

train_dataloader = GraphDataLoader(
    dataset, sampler=train_sampler, batch_size=5, drop_last=False
)
test_dataloader = GraphDataLoader(
    dataset, sampler=test_sampler, batch_size=5, drop_last=False
)

In [None]:
it = iter(train_dataloader)
batch = next(it)
print(batch)

[Graph(num_nodes=202, num_edges=892,
      ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={}), tensor([0, 0, 1, 0, 1])]


In [None]:
batched_graph, labels = batch
print(
    "Number of nodes for each graph element in the batch:",
    batched_graph.batch_num_nodes(),
)
print(
    "Number of edges for each graph element in the batch:",
    batched_graph.batch_num_edges(),
)

# Recover the original graph elements from the minibatch
graphs = dgl.unbatch(batched_graph)
print("The original graphs in the minibatch:")
print(graphs)

Number of nodes for each graph element in the batch: tensor([30, 52, 13, 84, 23])
Number of edges for each graph element in the batch: tensor([142, 250,  65, 320, 115])
The original graphs in the minibatch:
[Graph(num_nodes=30, num_edges=142,
      ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={}), Graph(num_nodes=52, num_edges=250,
      ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={}), Graph(num_nodes=13, num_edges=65,
      ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={}), Graph(num_nodes=84, num_edges=320,
      ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={}), Graph(num_nodes=23, num_edges=115,
      ndata_schemes={'label': Scheme(shape=(), dtype=t

### Define Simple GCN

In [None]:
from dgl.nn import GraphConv


class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, num_classes)

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        g.ndata["h"] = h
        return dgl.mean_nodes(g, "h")

In [None]:
!ls ../../

data  hact-net	work


In [None]:
import os

directory = "../../data/Images"  # Replace with the actual directory path

# List all items (files and subdirectories) in the directory
items = os.listdir(directory)

# Filter out only the files from the list of items
files = [item for item in items if os.path.isfile(os.path.join(directory, item))]

num_files = len(files)
print("Number of files in the directory:", num_files)


Number of files in the directory: 4253


In [None]:
!ls ../

 Inference.ipynb	    main			    real-time.ipynb
 augment.ipynb		    main.py			   'resnet (1).py'
 dev_18.90_PHOENIX14-T.pt   main_copy.py		    resnet.py
 hands-on-session-2	    out.output-hypothesis-dev.ctm   session.zip
 histocartography	    output_video-1.mp4
 histocartography.zip	    output_video.mp4


In [None]:
# import sys
# import sys
# sys.path.append('../histocartography/histocartography')  # Add the parent_folder to the Python path
# from preprocessing import (
#     VahadaneStainNormalizer,         # stain normalizer
#     NucleiExtractor,                 # nuclei detector 
#     DeepFeatureExtractor,            # feature extractor 
#     KNNGraphBuilder,                 # kNN graph builder
#     ColorMergedSuperpixelExtractor,  # tissue detector
#     DeepFeatureExtractor,            # feature extractor
#     RAGGraphBuilder,                 # build graph
#     AssignmnentMatrixBuilder         # assignment matrix 
# )

: 

In [None]:
import os
store_path = "graph"
split = "test"
image_name = "1_00061_sub0_007"
cg_out = os.path.join(store_path, 'cell_graphs', split, image_name.replace('.png', '.bin'))
tg_out = os.path.join(store_path, 'tissue_graphs', split, image_name.replace('.png', '.bin'))
assign_out = os.path.join(store_path, 'assignment_matrices', split, image_name.replace('.png', '.h5'))

: 

In [None]:
print(cg_ou)

## Read an exmaple graph to have a look

In [20]:
!ls graph/assignment_mat/test

1_00061_sub0_002.h5 1_00061_sub0_004.h5


In [93]:
cell_path = "graph/cell_graphs/test/1_00061_sub0_002.bin"
tissue_path = "graph/tissue_graphs/test/1_00061_sub0_002.bin"
assignment_path = "graph/assignment_mat/test/1_00061_sub0_002.h5"
ass_two = "graph/assignment_mat/test/1_00061_sub0_004.h5"
assignment_paths  = ["graph/assignment_mat/test/1_00061_sub0_002.h5","graph/assignment_mat/test/1_00061_sub0_004.h5"]
loaded_graphs = dgl.load_graphs(cell_path)
print(loaded_graphs[0][0].ndata.keys())
cg_feat = loaded_graphs[0][0].ndata['feat']
cg_centroid = loaded_graphs[0][0].ndata['centroid']


dict_keys(['feat', 'centroid'])


In [94]:
loaded_graphs = dgl.load_graphs(tissue_path)
print(loaded_graphs[0][0].ndata.keys())
tg_feat = loaded_graphs[0][0].ndata['feat']
tg_centroid = loaded_graphs[0][0].ndata['centroid']

dict_keys(['feat', 'centroid'])


In [95]:
import h5py
    # Assuming the assignment matrix is stored in a dataset named "assignment"
with h5py.File(assignment_path, "r") as f:
    print(f.keys())
    print(f["assignment_matrix"].shape)
    ass_mat = f["assignment_matrix"]
    print(ass_mat)

    # Find the column number with value 1 in each row
    column_numbers = np.argmax(ass_mat, axis=1)

    # Print the column numbers
    # print(column_numbers)
    print(column_numbers.shape)

    value_counts = np.bincount(column_numbers, minlength=5)

    # Print the count for each value
    for value, count in enumerate(value_counts):
        print(f"Value {value}: Count {count}")


<KeysViewHDF5 ['assignment_matrix']>
(387, 9)
<HDF5 dataset "assignment_matrix": shape (387, 9), type "<f8">
(387,)
Value 0: Count 1
Value 1: Count 60
Value 2: Count 32
Value 3: Count 1
Value 4: Count 43
Value 5: Count 53
Value 6: Count 189
Value 7: Count 6
Value 8: Count 2


In [96]:
# List of HDF5 file paths
# assignment_paths = [
#     "graph/assignment_mat/test/1_00061_sub0_004.h5",
#     "path_to_another_file.h5",  # Add more file paths as needed
# ]
assignment_paths  = [assignment_path,"graph/assignment_mat/test/1_00061_sub0_004.h5"]
print(len(assignment_paths))
assignment_matrices = []
for assignment_path in assignment_paths:
    # Open the HDF5 file for reading
    print(assignment_path)
    with h5py.File(assignment_path, "r") as file:
        print("FDSFS")
        # Assuming the assignment matrix is stored in a dataset named "assignment_matrix"
        assignment_matrix = file["assignment_matrix"][:]
        
    print(f"Individual shape {assignment_matrix.shape}")
    assignment_matrices.append(assignment_matrix)
assignment_tensor = np.stack(assignment_matrices, axis=0)
print(assignment_tensor.shape)
# Find the column number with value 2 in each row
column_numbers = np.argmax(assignment_tensor, axis=2)

# Print the column_numbers shape
print(f"Shape of column_numbers {column_numbers.shape}")
# for i in range(column_numbers.shape[0]):
#     print(i.shape)
# print(column_numbers[1])

2
graph/assignment_mat/test/1_00061_sub0_002.h5
FDSFS
Individual shape (387, 9)
graph/assignment_mat/test/1_00061_sub0_004.h5
FDSFS
Individual shape (290, 5)


ValueError: all input arrays must have the same shape

In [102]:
print("Summary:")
print(f"Cell graph have feature size {cg_feat.size()} and centroid size {cg_centroid.size()}")
print(f"Tissue graph have feature size {tg_feat.size()} and centroid size {tg_centroid.size()}")
print(f"Assignment matrix size {ass_mat.shape}")
print(f"Column vector shape { column_numbers.shape}")
print(type(column_numbers))

Summary:
Cell graph have feature size torch.Size([387, 514]) and centroid size torch.Size([387, 2])
Tissue graph have feature size torch.Size([9, 514]) and centroid size torch.Size([9, 2])
Assignment matrix size (387, 9)
Column vector shape (387,)
<class 'numpy.ndarray'>


In [104]:
values = torch.from_numpy(column_numbers)
print(values.shape)

torch.Size([387])


In [110]:
summed_features = torch.zeros(tg_feat.shape)  # Assuming there are 10 classes
print(summed_features.shape)
# Use torch.scatter_add to accumulate features based on class labels
summed_features = torch.scatter_add(summed_features, dim=0, index=torch.from_numpy(column_numbers).unsqueeze(1), src=cg_feat)


torch.Size([9, 514])


In [116]:
# !pip3 install pyyaml

In [120]:
path = "config/config.yaml"
import yaml
with open(path, 'r') as file:
    configs = yaml.safe_load(file)
print(configs["gnn_param"])

{'cell_layers': 2, 'tissue_layers': 2, 'cell_conv_method': 'GIN'}


In [125]:
from glob import glob
base_data_path = "../../Report-nmi-wsi"
split = "test"
img_path = os.path.join(base_data_path,"Images", split)
print(img_path)
list_name = glob(img_path+"/*.png")
print(len(list_name))
image_names = [os.path.splitext(os.path.split(i)[-1])[0]  for i in list_name]

../../Report-nmi-wsi/Images/test
0


In [124]:
!ls ../../Report-nmi-wsi/Images/test

[31m115831_000.png[m[m       [31m116814_008.png[m[m       [31m118398_000.png[m[m
[31m115831_001.png[m[m       [31m116814_011.png[m[m       [31m118398_001.png[m[m
[31m115831_006.png[m[m       [31m116814_013.png[m[m       [31m118398_002.png[m[m
[31m115831_007.png[m[m       [31m116814_014.png[m[m       [31m118398_005.png[m[m
[31m115831_012.png[m[m       [31m116840_000.png[m[m       [31m118398_006.png[m[m
[31m115831_013.png[m[m       [31m116840_001.png[m[m       [31m118398_008.png[m[m
[31m115832_000.png[m[m       [31m116840_002.png[m[m       [31m118398_009.png[m[m
[31m115832_003.png[m[m       [31m116840_004.png[m[m       [31m118398_010.png[m[m
[31m115832_005.png[m[m       [31m116840_005.png[m[m       [31m118398_012.png[m[m
[31m115832_006.png[m[m       [31m116840_007.png[m[m       [31m118398_013.png[m[m
[31m115832_008.png[m[m       [31m116840_009.png[m[m       [31m118398_014.png[m[m
[31m11583