In [1]:
import os

os.environ["DGLBACKEND"] = "pytorch"
import dgl
import numpy as np
import torch

g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]), num_nodes=6)
# Equivalently, PyTorch LongTensors also work.
g = dgl.graph(
    (torch.LongTensor([0, 0, 0, 0, 0]), torch.LongTensor([1, 2, 3, 4, 5])),
    num_nodes=6,
)

# You can omit the number of nodes argument if you can tell the number of nodes from the edge list alone.
g = dgl.graph(([0, 0, 0, 0, 0], [1, 2, 3, 4, 5]))



In [None]:
# Assign a 3-dimensional node feature vector for each node.
g.ndata["x"] = torch.randn(6, 3)
# Assign a 4-dimensional edge feature vector for each edge.
g.edata["a"] = torch.randn(5, 4)
# Assign a 5x4 node feature matrix for each node.  Node and edge features in DGL can be multi-dimensional.
g.ndata["y"] = torch.randn(6, 5, 4)

print(g.edata["a"])

tensor([[-1.0610, -2.6291, -1.5116,  1.2287],
        [ 0.4833, -1.6545, -1.1173,  0.0300],
        [ 0.0089,  0.4669,  0.4350,  1.2601],
        [ 0.7695,  2.0895, -1.7074, -0.5123],
        [ 0.0142, -0.0962, -1.2562, -0.8136]])


## Quantifying Graph Structure

In [None]:
print(g.num_nodes())
print(g.num_edges())
# Out degrees of the center node
print(g.out_degrees(0))
# In degrees of the center node - note that the graph is directed so the in degree should be 0.
print(g.in_degrees(0))

6
5
5
0


In [None]:
# Induce a subgraph from node 0, node 1 and node 3 from the original graph.
sg1 = g.subgraph([0, 1, 3])
# Induce a subgraph from edge 0, edge 1 and edge 3 from the original graph.
sg2 = g.edge_subgraph([0, 1, 3])

In [None]:
# The original node feature of each node in sg1
print(sg1.ndata["x"].shape)
# The original edge feature of each node in sg1
print(sg1.edata["a"].shape)
# The original node feature of each node in sg2
print(sg2.ndata["x"])
# The original edge feature of each node in sg2
print(sg2.edata["a"])

torch.Size([3, 3])
torch.Size([2, 4])
tensor([[-0.3360, -0.9976, -2.4161],
        [ 2.4269, -0.9421,  0.6361],
        [ 1.2461,  0.8691,  1.0383],
        [ 0.8714, -0.4506, -0.8767]])
tensor([[-1.0610, -2.6291, -1.5116,  1.2287],
        [ 0.4833, -1.6545, -1.1173,  0.0300],
        [ 0.7695,  2.0895, -1.7074, -0.5123]])


## Save and Load Graphs

In [None]:
dgl.save_graphs("graph.dgl", g)

In [None]:
a,b = dgl.load_graphs("graph.dgl")
print(a)
print("-----")
print(b)

[Graph(num_nodes=6, num_edges=5,
      ndata_schemes={'y': Scheme(shape=(5, 4), dtype=torch.float32), 'x': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={'a': Scheme(shape=(4,), dtype=torch.float32)})]
-----
{}


## Custom GNN for Graph Classification Example

In [None]:
import os

os.environ["DGLBACKEND"] = "pytorch"
import dgl
import dgl.data
import torch
import torch.nn as nn
import torch.nn.functional as F

In [None]:
# Generate a synthetic dataset with 10000 graphs, ranging from 10 to 500 nodes.
dataset = dgl.data.GINDataset("PROTEINS", self_loop=True)

Downloading /home/featurize/.dgl/GINDataset.zip from https://raw.githubusercontent.com/weihua916/powerful-gnns/master/dataset.zip...
Extracting file to /home/featurize/.dgl/GINDataset


In [None]:
print("Node feature dimensionality:", dataset.dim_nfeats)
print("Number of graph categories:", dataset.gclasses)


from dgl.dataloading import GraphDataLoader

Node feature dimensionality: 3
Number of graph categories: 2


In [None]:
print(dataset.graph_label)

AttributeError: 'GINDataset' object has no attribute 'graph_label'

### Define a Graph Dataloader

In [None]:
from torch.utils.data.sampler import SubsetRandomSampler

num_examples = len(dataset)
num_train = int(num_examples * 0.8)

train_sampler = SubsetRandomSampler(torch.arange(num_train))
test_sampler = SubsetRandomSampler(torch.arange(num_train, num_examples))

train_dataloader = GraphDataLoader(
    dataset, sampler=train_sampler, batch_size=5, drop_last=False
)
test_dataloader = GraphDataLoader(
    dataset, sampler=test_sampler, batch_size=5, drop_last=False
)

In [None]:
it = iter(train_dataloader)
batch = next(it)
print(batch)

[Graph(num_nodes=202, num_edges=892,
      ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={}), tensor([0, 0, 1, 0, 1])]


In [None]:
batched_graph, labels = batch
print(
    "Number of nodes for each graph element in the batch:",
    batched_graph.batch_num_nodes(),
)
print(
    "Number of edges for each graph element in the batch:",
    batched_graph.batch_num_edges(),
)

# Recover the original graph elements from the minibatch
graphs = dgl.unbatch(batched_graph)
print("The original graphs in the minibatch:")
print(graphs)

Number of nodes for each graph element in the batch: tensor([30, 52, 13, 84, 23])
Number of edges for each graph element in the batch: tensor([142, 250,  65, 320, 115])
The original graphs in the minibatch:
[Graph(num_nodes=30, num_edges=142,
      ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={}), Graph(num_nodes=52, num_edges=250,
      ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={}), Graph(num_nodes=13, num_edges=65,
      ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={}), Graph(num_nodes=84, num_edges=320,
      ndata_schemes={'label': Scheme(shape=(), dtype=torch.int64), 'attr': Scheme(shape=(3,), dtype=torch.float32)}
      edata_schemes={}), Graph(num_nodes=23, num_edges=115,
      ndata_schemes={'label': Scheme(shape=(), dtype=t

### Define Simple GCN

In [None]:
from dgl.nn import GraphConv


class GCN(nn.Module):
    def __init__(self, in_feats, h_feats, num_classes):
        super(GCN, self).__init__()
        self.conv1 = GraphConv(in_feats, h_feats)
        self.conv2 = GraphConv(h_feats, num_classes)

    def forward(self, g, in_feat):
        h = self.conv1(g, in_feat)
        h = F.relu(h)
        h = self.conv2(g, h)
        g.ndata["h"] = h
        return dgl.mean_nodes(g, "h")

In [None]:
!ls ../../

data  hact-net	work


In [None]:
import os

directory = "../../data/Images"  # Replace with the actual directory path

# List all items (files and subdirectories) in the directory
items = os.listdir(directory)

# Filter out only the files from the list of items
files = [item for item in items if os.path.isfile(os.path.join(directory, item))]

num_files = len(files)
print("Number of files in the directory:", num_files)


Number of files in the directory: 4253


In [None]:
!ls ../

 Inference.ipynb	    main			    real-time.ipynb
 augment.ipynb		    main.py			   'resnet (1).py'
 dev_18.90_PHOENIX14-T.pt   main_copy.py		    resnet.py
 hands-on-session-2	    out.output-hypothesis-dev.ctm   session.zip
 histocartography	    output_video-1.mp4
 histocartography.zip	    output_video.mp4


In [None]:
# import sys
# import sys
# sys.path.append('../histocartography/histocartography')  # Add the parent_folder to the Python path
# from preprocessing import (
#     VahadaneStainNormalizer,         # stain normalizer
#     NucleiExtractor,                 # nuclei detector 
#     DeepFeatureExtractor,            # feature extractor 
#     KNNGraphBuilder,                 # kNN graph builder
#     ColorMergedSuperpixelExtractor,  # tissue detector
#     DeepFeatureExtractor,            # feature extractor
#     RAGGraphBuilder,                 # build graph
#     AssignmnentMatrixBuilder         # assignment matrix 
# )

: 

In [None]:
import os
store_path = "graph"
split = "test"
image_name = "1_00061_sub0_007"
cg_out = os.path.join(store_path, 'cell_graphs', split, image_name.replace('.png', '.bin'))
tg_out = os.path.join(store_path, 'tissue_graphs', split, image_name.replace('.png', '.bin'))
assign_out = os.path.join(store_path, 'assignment_matrices', split, image_name.replace('.png', '.h5'))

: 

## Read an exmaple graph to have a look

In [20]:
!ls graph/assignment_mat/test

1_00061_sub0_002.h5 1_00061_sub0_004.h5


In [277]:
cell_path = "graph/cell_graphs/test/1_00061_sub0_002.bin"
tissue_path = "graph/tissue_graphs/test/1_00061_sub0_002.bin"
assignment_path = "graph/assignment_mat/test/1_00061_sub0_002.h5"
ass_two = "graph/assignment_mat/test/1_00061_sub0_004.h5"
assignment_paths  = ["graph/assignment_mat/test/1_00061_sub0_002.h5","graph/assignment_mat/test/1_00061_sub0_004.h5"]
loaded_graphs = dgl.load_graphs(cell_path)
print(loaded_graphs)
cg_feat = loaded_graphs[0][0].ndata['feat']
cg_centroid = loaded_graphs[0][0].ndata['centroid']
print(loaded_graphs[0][0].edges()[0].size())
print(loaded_graphs[0][0].edges()[1].size())
a = torch.stack(loaded_graphs[0][0].edges())
print(a.shape)

([Graph(num_nodes=387, num_edges=1908,
      ndata_schemes={'feat': Scheme(shape=(514,), dtype=torch.float32), 'centroid': Scheme(shape=(2,), dtype=torch.float32)}
      edata_schemes={})], {})
torch.Size([1908])
torch.Size([1908])
torch.Size([2, 1908])


In [94]:
loaded_graphs = dgl.load_graphs(tissue_path)
print(loaded_graphs[0][0].ndata.keys())
tg_feat = loaded_graphs[0][0].ndata['feat']
tg_centroid = loaded_graphs[0][0].ndata['centroid']

dict_keys(['feat', 'centroid'])


In [271]:
import h5py
    # Assuming the assignment matrix is stored in a dataset named "assignment"
with h5py.File(assignment_path, "r") as f:
    print(f.keys())
    print(f["assignment_matrix"].shape)
    ass_mat = f["assignment_matrix"]
    print(type(ass_mat))
    print(ass_mat)

print(f)
    # # Find the column number with value 1 in each row
    # column_numbers = np.argmax(ass_mat, axis=1)

    # # Print the column numbers
    # # print(column_numbers)
    # print(column_numbers.shape)

    # value_counts = np.bincount(column_numbers, minlength=5)

    # # Print the count for each value
    # for value, count in enumerate(value_counts):
    #     print(f"Value {value}: Count {count}")


<KeysViewHDF5 ['assignment_matrix']>
(387, 9)
<class 'h5py._hl.dataset.Dataset'>
<HDF5 dataset "assignment_matrix": shape (387, 9), type "<f8">
<Closed HDF5 file>


In [96]:
# List of HDF5 file paths
# assignment_paths = [
#     "graph/assignment_mat/test/1_00061_sub0_004.h5",
#     "path_to_another_file.h5",  # Add more file paths as needed
# ]
assignment_paths  = [assignment_path,"graph/assignment_mat/test/1_00061_sub0_004.h5"]
print(len(assignment_paths))
assignment_matrices = []
for assignment_path in assignment_paths:
    # Open the HDF5 file for reading
    print(assignment_path)
    with h5py.File(assignment_path, "r") as file:
        print("FDSFS")
        # Assuming the assignment matrix is stored in a dataset named "assignment_matrix"
        assignment_matrix = file["assignment_matrix"][:]
        
    print(f"Individual shape {assignment_matrix.shape}")
    assignment_matrices.append(assignment_matrix)
assignment_tensor = np.stack(assignment_matrices, axis=0)
print(assignment_tensor.shape)
# Find the column number with value 2 in each row
column_numbers = np.argmax(assignment_tensor, axis=2)

# Print the column_numbers shape
print(f"Shape of column_numbers {column_numbers.shape}")
# for i in range(column_numbers.shape[0]):
#     print(i.shape)
# print(column_numbers[1])

2
graph/assignment_mat/test/1_00061_sub0_002.h5
FDSFS
Individual shape (387, 9)
graph/assignment_mat/test/1_00061_sub0_004.h5
FDSFS
Individual shape (290, 5)


ValueError: all input arrays must have the same shape

In [102]:
print("Summary:")
print(f"Cell graph have feature size {cg_feat.size()} and centroid size {cg_centroid.size()}")
print(f"Tissue graph have feature size {tg_feat.size()} and centroid size {tg_centroid.size()}")
print(f"Assignment matrix size {ass_mat.shape}")
print(f"Column vector shape { column_numbers.shape}")
print(type(column_numbers))

Summary:
Cell graph have feature size torch.Size([387, 514]) and centroid size torch.Size([387, 2])
Tissue graph have feature size torch.Size([9, 514]) and centroid size torch.Size([9, 2])
Assignment matrix size (387, 9)
Column vector shape (387,)
<class 'numpy.ndarray'>


In [104]:
values = torch.from_numpy(column_numbers)
print(values.shape)

torch.Size([387])


In [110]:
summed_features = torch.zeros(tg_feat.shape)  # Assuming there are 10 classes
print(summed_features.shape)
# Use torch.scatter_add to accumulate features based on class labels
summed_features = torch.scatter_add(summed_features, dim=0, index=torch.from_numpy(column_numbers).unsqueeze(1), src=cg_feat)


torch.Size([9, 514])


In [120]:
path = "config/config.yaml"
import yaml
with open(path, 'r') as file:
    configs = yaml.safe_load(file)
print(configs["gnn_param"])

{'cell_layers': 2, 'tissue_layers': 2, 'cell_conv_method': 'GIN'}


In [125]:
from glob import glob
base_data_path = "../../Report-nmi-wsi"
split = "test"
img_path = os.path.join(base_data_path,"Images", split)
print(img_path)
list_name = glob(img_path+"/*.png")
print(len(list_name))
image_names = [os.path.splitext(os.path.split(i)[-1])[0]  for i in list_name]

../../Report-nmi-wsi/Images/test
0


In [135]:
class Vocabulary(object):
    """Simple vocabulary wrapper."""
    def __init__(self):
        self.word2idx = {}
        self.idx2word = {}
        self.idx = 0

    def add_word(self, word):
        if not word in self.word2idx:
            self.word2idx[word] = self.idx
            self.idx2word[self.idx] = word
            self.idx += 1

    def __call__(self, word):
        if not word in self.word2idx:
            return self.word2idx['<unk>']
        return self.word2idx[word]

    def __len__(self):
        return len(self.word2idx)

In [167]:
# !ls ../../Report-nmi-wsi/
import pickle

# Load the pickle file
with open('../../Report-nmi-wsi/vocab_bladderreport.pkl', 'rb') as f:
    vocab = pickle.load(f)

# print(data)
vocab_size = len(data.word2idx)
END_TOKEN = vocab.word2idx['<end>']
PAD_TOKEN = vocab.word2idx['<pad>'] # PAD_TOKEN is used for not supervison
print(len(vocab))

# Access the word-to-index mapping
for word, idx in vocab.word2idx.items():
    print(f"Word: {word}, Index: {idx}")



119
Word: <pad>, Index: 0
Word: <end>, Index: 1
Word: mild, Index: 2
Word: pleomorphism, Index: 3
Word: is, Index: 4
Word: present, Index: 5
Word: ., Index: 6
Word: there, Index: 7
Word: are, Index: 8
Word: no, Index: 9
Word: signs, Index: 10
Word: of, Index: 11
Word: crowding, Index: 12
Word: in, Index: 13
Word: the, Index: 14
Word: nuclei, Index: 15
Word: polarity, Index: 16
Word: along, Index: 17
Word: basement, Index: 18
Word: membrane, Index: 19
Word: negligibly, Index: 20
Word: lost, Index: 21
Word: mitosis, Index: 22
Word: rare, Index: 23
Word: have, Index: 24
Word: inconspicuous, Index: 25
Word: nucleoli, Index: 26
Word: normal, Index: 27
Word: nuclear, Index: 28
Word: features, Index: 29
Word: show, Index: 30
Word: pictured, Index: 31
Word: exhibit, Index: 32
Word: not, Index: 33
Word: exceedingly, Index: 34
Word: and, Index: 35
Word: limited, Index: 36
Word: only, Index: 37
Word: to, Index: 38
Word: basal, Index: 39
Word: layer, Index: 40
Word: urothelium, Index: 41
Word: ret

In [170]:
!pip3 install nltk

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
Defaulting to user installation because normal site-packages is not writeable
Collecting nltk
  Downloading nltk-3.8.1-py3-none-any.whl (1.5 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m6.4 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hCollecting click (from nltk)
  Obtaining dependency information for click from https://files.pythonhosted.org/packages/00/2e/d53fa4befbf2cfa713304affc7ca780ce4fc1fd8710527771b58311a3229/click-8.1.7-py3-none-any.whl.metadata
  Downloading click-8.1.7-py3-none-any.whl.metadata (3.0 kB)
Collecting joblib (from nltk)
  Obtaining dependency information for joblib from https://files.pythonhosted.org/packages/10/40/d551139c85db202f1f384ba8bcf96aca2f329440a844f924c8a0040b6d02/joblib-1.3.2-py3-none-any.whl.metadata
  Downloading jo

In [194]:
!ls ../../Report-nmi-wsi

[34mImages[m[m                  eval_annotation.json    [31mtest_annotation.json[m[m
[31mImages.zip[m[m              nmi-wsi.zip             [31mtrain_annotation.json[m[m
[31mclass.json[m[m              [34mtest[m[m                    vocab_bladderreport.pkl


In [230]:
img_path = "../../Report-nmi-wsi/Images"
split = "test"
report_path = "../../Report-nmi-wsi/test_annotation.json"
list_name = glob(img_path+"/test"+"/*.png")
image_names = [os.path.splitext(os.path.split(i)[-1])[0]  for i in list_name]
image_file_paths = [os.path.join(img_path,split,i) for i in image_names]
with open(report_path, 'r') as json_file:
    report_data = json.load(json_file)
image_names.sort()
print(image_names)
sorted_report = {key: report_data[key] for key in sorted(report_data)}
print(sorted_report.keys())
captions = [item['caption'] for key, item in sorted_report.items() if key in image_names]
labels = [sorted_report[key]['label'] for key in image_names if key in sorted_report.keys()]
print(captions)
print(len(captions[0]))
print(labels)
sentences = [cap.rstrip('.').replace(',','').split('. ') for cap in captions]
print(sentences)

['115831_000', '115831_001', '115831_006', '115831_007', '115831_012', '115831_013', '115832_000', '115832_003', '115832_005', '115832_006', '115832_008', '115832_009', '115832_011', '115832_013', '115832_014', '115834_000', '115834_002', '115834_004', '115834_006', '115834_009', '115834_010', '115834_012', '115834_013', '115835_002', '115835_003', '115835_004', '115835_009', '115835_010', '115835_011', '115835_012', '115835_013', '115836_001', '115836_003', '115836_005', '115836_006', '115836_007', '115836_008', '115836_009', '115836_010', '115836_012', '115836_013', '115836_014', '115838_001', '115838_002', '115838_004', '115838_006', '115838_007', '115838_008', '115838_009', '115838_010', '115838_011', '115838_012', '115838_013', '115838_014', '115840_000', '115840_001', '115840_003', '115840_004', '115840_005', '115840_006', '115840_007', '115840_008', '115840_011', '115840_012', '115840_013', '115840_014', '115846_001', '115846_002', '115846_003', '115846_004', '115846_005', '1158

AttributeError: 'list' object has no attribute 'rstrip'

In [227]:
print(7 % 5)
print(int(9 / 5))

2
1


In [261]:
import json
import nltk
anno = json.load(open('../../Report-nmi-wsi/test_annotation.json', 'r'))
exp = anno['1_00161_sub0_041']['caption'][0]
print(exp)
#sentences = exp.rstrip('.').replace(',','').split('. ')
#sentences = exp.rstrip('.').replace(',','').split('. ')
print(sentences)
# print(len(anno))
# num_anno_per_img = 5
# num_data = len(anno)
# order = [a for a in range(0, num_data)]
# print(order[0:20])
# for idx in order[0:20]:
#     id = order[idx]
#     capid = int(id / len(anno))
#     imid = int(id / num_anno_per_img)
#     print(f"id {id} capid {capid} imid {imid} ")

sent_tokens = [] # convert to tokens for all num_feature sentences
paragraphs = []
num_feature = 6
max_subseq_len = 16
sentences = exp.rstrip('.').replace(',','').split('. ')
#assert(len(sentences) == self.num_feature, 'the number of sentence is not correct in [{}]'.format(caption))
print(sentences)
sent_tokens = [] # convert to tokens for all num_feature sentences
paragraph = []
for s, sentence in enumerate(sentences):
    # if feature (except conclusion) is insufficient information, do not output it
    # but the conclusion (last one) is insufficient information, we still output it
    if 'insufficient' in sentence and s < (len(sentences)-1): 
        continue
    tokens = nltk.tokenize.word_tokenize(str(sentence).lower())
    paragraph.append(str(sentence))
    #tokens.append('<end>') # add stop indictor
    tmp = [vocab(token) for token in tokens]
    sent_tokens.extend(tmp)
sent_tokens.append(vocab('<end>'))
# +1 to add feature indictor at the head
print(paragraphs)
print(f"sent_tokens is {sent_tokens}")
caption = np.ones((num_feature, max_subseq_len), np.int32) * PAD_TOKEN # 1 for end token
stop_indictor = np.ones(num_feature, np.int32) * PAD_TOKEN
#print(stop_indictor.size)
# assert(self.PAD_TOKEN == 0)
# print(paragraph)
#print(len(sent_tokens))
#for sent_token in sent_tokens:
stop_label = 2
sent_token = sent_tokens[0]
# for j, tmp in enumerate(sent_tokens):
#         caption[j,:len(tmp)]= tmp
#         stop_indictor[j] = 1 # continue indictor
# stop_indictor[j] = stop_label # stop indictor
# paragraph = ' '.join(paragraph).lower()
# print(len(paragraph))
# print(len(sent_tokens))
# a=torch.Tensor(sent_tokens[0]).long()                    # line 6
# print(a.shape)


Mild pleomorphism and cytologic atypia is present. There are no signs of crowding in the nuclei. Polarity is not lost. Mitosis appears to be rare. The nucleoli of nuclei are prominent. Low grade.
['Mild pleomorphism and cytologic atypia is present', 'There are no signs of crowding in the nuclei', 'Polarity is not lost', 'Mitosis appears to be rare', 'The nucleoli of nuclei are prominent', 'Low grade']
['Mild pleomorphism and cytologic atypia is present', 'There are no signs of crowding in the nuclei', 'Polarity is not lost', 'Mitosis appears to be rare', 'The nucleoli of nuclei are prominent', 'Low grade']
[]
sent_tokens is [2, 3, 35, 44, 45, 4, 5, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 4, 33, 21, 22, 46, 38, 47, 23, 14, 26, 11, 15, 8, 64, 92, 81, 1]


In [280]:
!pip3 install -r requirements.txt

Please see https://github.com/pypa/pip/issues/5599 for advice on fixing the underlying issue.
To avoid this problem you can invoke Python with '-m pip' instead of running pip directly.
Defaulting to user installation because normal site-packages is not writeable
Collecting opencv-python==4.8.0.76 (from -r requirements.txt (line 2))
  Obtaining dependency information for opencv-python==4.8.0.76 from https://files.pythonhosted.org/packages/8a/6f/8aa049b66bcba8b5a4dc872ecfdbcd8603a96704b070fde22222e479c3d7/opencv_python-4.8.0.76-cp37-abi3-macosx_10_16_x86_64.whl.metadata
  Downloading opencv_python-4.8.0.76-cp37-abi3-macosx_10_16_x86_64.whl.metadata (19 kB)
Collecting pandas==2.0.3 (from -r requirements.txt (line 3))
  Obtaining dependency information for pandas==2.0.3 from https://files.pythonhosted.org/packages/78/a8/07dd10f90ca915ed914853cd57f79bfc22e1ef4384ab56cb4336d2fc1f2a/pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.whl.metadata
  Downloading pandas-2.0.3-cp38-cp38-macosx_10_9_x86_64.

In [18]:
from dataloader import make_dataloader
from Vocabulary import Vocabulary
from models.Graph_Model import GNNEncoder
from models.LSTM import LSTMDecoder
import torch
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(DEVICE)
loader = make_dataloader(
    batch_size = 4,
    split = "test",
    base_data_path = "../../Report-nmi-wsi",
    graph_path = "graph",
    vocab_path = "../../Report-nmi-wsi/vocab_bladderreport.pkl",
    shuffle=True,
    num_workers=0,
    load_in_ram = True
)
vocab_size = len(loader.dataset.vocab)
for batched_idx, batch_data in enumerate(loader):
    cg, tg, assign_mat, caption_tokens, label = batch_data  

    # print(assign_mat[0].shape)
    encoder = GNNEncoder(cell_conv_method = "GCN", tissue_conv_method = "GCN", pool_method = None, num_layers = 3, aggregate_method = "sum", input_feat = 514,output_size = 256)
    out = encoder(cg[0],tg[0],assign_mat[0])
    print(f"GNN out shape is {out.shape}")
    decoder = LSTMDecoder(vocab_size = vocab_size, embed_size = out.shape[0], hidden_size = 256,  batch_size=4, device = DEVICE)
    lstm_out = decoder(out)
    print(f"LSTM out shape {lstm_out.shape}")

        #lstm_model = LSTMDecoder(vocab_size = dataloader.vocab.size, embed_size = embed_size, hidden_size = hidden_size,  batch_size=batch_size, device = DEVICE)

    

cpu
ModuleList(
  (0-2): 3 x GCNConv(514, 514)
)
<class 'torch.Tensor'>
<class 'torch.Tensor'>
WHY NOT
GCNConv(514, 514)
GCNConv(514, 514)
GCNConv(514, 514)
---------Column Number--------
Summed feature shape: torch.Size([5, 514])
Column number shape: torch.Size([5])
After unsqueeze shape: torch.Size([5, 1])
---------Column Number--------
Exception: index 93 is out of bounds for dimension 0 with size 5
---------Column Number--------
Summed feature shape: torch.Size([5, 514])
Column number shape: torch.Size([5])
After unsqueeze shape: torch.Size([5, 1])
---------Column Number--------
GNN out shape is torch.Size([514])


RuntimeError: Expected tensor for argument #1 'indices' to have one of the following scalar types: Long, Int; but got torch.FloatTensor instead (while checking arguments for embedding)

In [38]:
import dgl
import torch
import torch.nn as nn

class GNNModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GNNModel, self).__init__()
        
        self.conv1 = dgl.nn.GraphConv(input_dim, hidden_dim)
        self.conv2 = dgl.nn.GraphConv(hidden_dim, output_dim)
        
    def forward(self, g):
        # First Graph Convolution Layer
        h = self.conv1(g, g.ndata['feat'])
        h = torch.relu(h)
        print(f"first conv {h.shape}")
        # Second Graph Convolution Layer
        h = self.conv2(g, h)
        h = torch.relu(h)
        print(f"second conv {h.shape}")
        g.ndata['h'] = h
        # Aggregating node embeddings to graph embeddings
        graph_embeddings = dgl.sum_nodes(g, 'h')
        print("--------Below , sum with feat----------")
        print(dgl.sum_nodes(g, 'feat').shape)
        # print("--------Below , sum with h----------")
        # print(graph_embeddings)
        return graph_embeddings

# Example usage
input_dim = 64  # Dimension of node features
hidden_dim = 128  # Hidden dimension for GNN layers
output_dim = 256  # Desired embedding size

# Create synthetic batched graph data (replace with your own data loading logic)
graph_list = [dgl.graph(([0, 1, 2, 3, 4, 5, 6, 7, 8, 9], [1, 2, 3, 4, 5, 6, 7, 8, 9, 0])),
              dgl.graph(([0, 1, 2, 3, 4, 5, 6, 7], [1, 2, 3, 4, 5, 6, 7, 0])),
              dgl.graph(([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0])),
              dgl.graph(([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14], [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0])),
              ]

# Add node features to each graph
for graph in graph_list:
    num_nodes = graph.number_of_nodes()
    graph.ndata['feat'] = torch.randn(num_nodes, input_dim)

# Instantiate the GNN model
model = GNNModel(input_dim, hidden_dim, output_dim)

# Convert the graph list into a batched graph
batched_graph = dgl.batch(graph_list)
print(batched_graph)
print(f"Single Grpah {graph_list[0]}")
# Process the batched graph and get graph embeddings
embeddings = model(batched_graph)
#print(embeddings)
print(embeddings.shape)  # Should be (batch_size, output_dim)



Graph(num_nodes=45, num_edges=45,
      ndata_schemes={'feat': Scheme(shape=(64,), dtype=torch.float32)}
      edata_schemes={})
Single Grpah Graph(num_nodes=10, num_edges=10,
      ndata_schemes={'feat': Scheme(shape=(64,), dtype=torch.float32)}
      edata_schemes={})
first conv torch.Size([45, 128])
second conv torch.Size([45, 256])
--------Below , sum with feat----------
torch.Size([4, 64])
tensor([[0.5126, 0.3277, 0.5775,  ..., 4.8440, 0.6812, 5.0525],
        [1.2623, 1.2304, 0.5477,  ..., 3.7388, 0.0000, 1.5861],
        [2.0070, 2.5013, 0.6266,  ..., 4.5245, 1.2443, 4.7723],
        [2.1535, 1.0610, 0.9756,  ..., 8.9756, 1.2385, 4.0818]],
       grad_fn=<SegmentReduceBackward>)
torch.Size([4, 256])
