In [4]:
import argparse
import sys

import mlflow
import numpy as np
import scanpy as sc
import squidpy as sq
import torch_geometric
from torch_geometric.data import Data
from torch_geometric.transforms import RandomLinkSplit, RandomNodeSplit

from autotalker.data import load_spatial_adata_from_csv
from autotalker.data import SpatialAnnTorchDataset
from autotalker.models import Autotalker

dataset = "squidpy_seqfish"
n_epochs = 10
lr = 0.01
batch_size = 128
n_hidden = 32
n_latent = 16
dropout_rate = 0.

print(f"Using dataset {dataset}.")

if dataset == "deeplinc_seqfish":
    adata = load_spatial_adata_from_csv("datasets/seqFISH/counts.csv",
                                        "datasets/seqFISH/adj.csv")
    cell_type_key = None
elif dataset == "squidpy_seqfish":
    adata = sq.datasets.seqfish()
    sq.gr.spatial_neighbors(adata, radius = 0.04, coord_type="generic")
    cell_type_key = "celltype_mapped_refined"
elif dataset == "squidpy_slideseqv2":
    adata = sq.datasets.slideseqv2()
    sq.gr.spatial_neighbors(adata, radius = 30.0, coord_type="generic")
    cell_type_key = "celltype_mapped_refined"

adata.layers["counts"] = adata.X.copy()

print(f"Number of nodes: {adata.X.shape[0]}")
print(f"Number of node features: {adata.X.shape[1]}")
avg_edges_per_node = round(
    adata.obsp['spatial_connectivities'].toarray().sum(axis=0).mean(),2)
print(f"Average number of edges per node: {avg_edges_per_node}")
n_edges = int(np.triu(adata.obsp['spatial_connectivities'].toarray()).sum())
print(f"Number of edges: {n_edges}", sep="")

dataset = SpatialAnnTorchDataset(adata, adj_key="spatial_connectivities")
data = Data(x=dataset.x,
            edge_index=dataset.edge_index,
            size_factors=dataset.size_factors)

Using dataset squidpy_seqfish.
Number of nodes: 19416
Number of node features: 351
Average number of edges per node: 4.4
Number of edges: 42694


In [1]:
data

NameError: name 'data' is not defined

In [2]:
# Split data on edge level
random_link_split = RandomLinkSplit(num_val=0.1,
                                    num_test=0.1,
                                    is_undirected=True,
                                    neg_sampling_ratio=1.)
random_node_split = RandomNodeSplit(num_val=0.1,
                                    num_test=0,
                                    key="x")

data = random_node_split(data)
train_data, val_data, test_data = random_link_split(data)

print(train_data)
print(val_data)

NameError: name 'RandomLinkSplit' is not defined

In [7]:
data

Data(x=[19416, 351], edge_index=[2, 85388], size_factors=[19416], train_mask=[19416], val_mask=[19416], test_mask=[19416])

In [8]:
n_nodes = train_data.train_mask.sum()
node_loader_batch_size = 4
n_nodes / node_loader_batch_size

tensor(4368.5000)

In [9]:
edge_train_loader = torch_geometric.loader.LinkNeighborLoader(
    train_data,
    num_neighbors=[-1]*1, # iterations of neighbor sampling
    batch_size=16,
    edge_label_index=train_data.edge_label_index,
    # edge_label=train_data.edge_label,
    directed=False,
    shuffle=False,
    neg_sampling_ratio=1.0)

In [10]:
node_train_loader = torch_geometric.loader.NeighborLoader(
    train_data,
    num_neighbors=[-1]*1, # iterations of neighbor sampling
    batch_size=4,
    directed=True,
    shuffle=False,
    input_nodes=train_data.train_mask)

In [11]:
edge_val_loader = torch_geometric.loader.LinkNeighborLoader(
    val_data,
    num_neighbors=[-1]*1, # iterations of neighbor sampling
    batch_size=4,
    edge_label_index=val_data.edge_label_index,
    # edge_label=val_data.edge_label,
    directed=False,
    shuffle=True,
    neg_sampling_ratio=1.0)

In [15]:
for epoch in range(2):
    for batch in node_train_loader:
        print(batch)
        print(batch.x.size(0))
        break

Data(x=[19, 351], edge_index=[2, 15], size_factors=[19], train_mask=[19], val_mask=[19], test_mask=[19], edge_label=[15], edge_label_index=[2, 15], batch_size=4)
19
Data(x=[19, 351], edge_index=[2, 15], size_factors=[19], train_mask=[19], val_mask=[19], test_mask=[19], edge_label=[15], edge_label_index=[2, 15], batch_size=4)
19


In [13]:
node_val_loader = torch_geometric.loader.NeighborLoader(
    val_data,
    num_neighbors=[-1]*1, # iterations of neighbor sampling
    batch_size=8,
    input_nodes=train_data.train_mask)

In [14]:
n_edges = train_data.edge_label_index.size(1)

In [None]:
n_edges

In [None]:
edge_batch_size = 256

In [None]:
edge_train_loader_iters = int(np.ceil(n_edges / edge_batch_size))

In [None]:
edge_train_loader_iters

In [None]:
n_nodes = train_data.train_mask.sum()

In [None]:
n_nodes.item()

In [None]:
node_batch_size = int(np.floor(n_nodes / edge_train_loader_iters))

In [None]:
node_batch_size

In [None]:
len(node_train_loader)

In [None]:
len(edge_train_loader)

In [None]:
len(node_train_loader)

In [None]:
for epoch in range(4):
    for batch in node_train_loader:
        print(batch)
        break

for i, zipped in enumerate(zip(edge_train_loader, node_train_loader)):
    if i > 2180:
        print(zipped)

In [None]:
len(edge_train_loader)

In [None]:
len(node_train_loader)

In [None]:
sampled_data = next(iter(edge_val_loader))
print(sampled_data)

In [None]:
sampled_data.edge_label_index

In [None]:
sampled_data = node_split(sampled_data)

In [None]:
sampled_data.train_mask

In [None]:
sampled_data_train = sampled_data[sampled_data.train_mask]

In [None]:
len(sampled_data.x[sampled_data.val_mask])

In [None]:
sampled_data["val_mask"]

In [None]:
len(sampled_data.x)

In [None]:
from torch_geometric.utils import coalesce
coalesce(sampled_data.edge_label_index)

In [None]:
sampled_data.edge_label_index[0].sort(dim=-1)

In [None]:
sampled_data.edge_label

In [None]:
sort_index = sampled_data.edge_label_index[0].sort(dim=-1).indices
edge_label_index_sorted = sampled_data.edge_label_index[:, sort_index]
edge_labels_sorted = sampled_data.edge_label[sort_index]

In [None]:
edge_label_index_sorted

In [None]:
edge_labels_sorted

In [None]:
edge_labels = sampled_data.edge_label

In [None]:
edge_labels

In [None]:
edge_label_index_sorted

In [None]:
edge_label_index_sorted.indices[0]

In [None]:
sampled_data.edge_label[edge_label_index_sorted.indices[0]]

In [None]:
torch.arange(x.size(0)).unsqueeze(1)

In [None]:
onehot.scatter_(1, idx.long(), 1)

In [None]:
sort_edge_index(sampled_data.edge_label)

In [None]:
sampled_data = next(iter(train_loader))
print(sampled_data)

In [None]:
len(valid_loader)

In [None]:
len(train_loader)

In [None]:
1423*3

In [None]:
len(train_loader)

In [None]:
sampled_data = node_split(sampled_data)

In [None]:
sampled_data

In [None]:
import torch
sampled_data.edge_label
pos_edge_label_mask = (sampled_data.edge_label > 0)

In [None]:
adj_recon_logits = torch.randn(10, 10)

In [None]:
adj_recon_logits

In [None]:
(torch.tensor(adj_recon_logits.shape[0]) - torch.tensor(mask.shape[0])).item()

In [None]:
torch.tensor(mask.shape)

In [None]:
n_nodes=adj_recon_logits.shape[0]
n_nodes

In [None]:
pad_dim = (torch.tensor(adj_recon_logits.shape[0]) - torch.tensor(mask.shape[0])).item()

In [None]:
padded_mask = F.pad(mask, (0, pad_dim, 0, pad_dim), "constant", False)

In [None]:
mask.size()

In [None]:
padded_mask.size()

In [None]:
sampled_data.edge_label_index

In [None]:
adj_recon_logits[7,2]

In [None]:
import torch.nn.functional as F
pad_dim = (torch.tensor(adj_recon_logits.shape[0]) - torch.tensor(mask.shape[0])).item()
mask = torch.squeeze(torch_geometric.utils.to_dense_adj(sampled_data.edge_label_index)) > 0
padded_mask = F.pad(mask, (0, pad_dim, 0, pad_dim), "constant", False)
torch.masked_select(adj_recon_logits, padded_mask)

In [None]:
padded_mask

In [None]:
sampled_data.edge_label_index[:, pos_edge_label_mask]

In [None]:
from torch_geometric.utils import add_self_loops

edge_index_self_loops = add_self_loops(sampled_data.edge_label_index)[0]

In [None]:
edge_label_index = sampled_data.edge_label_index
edge_labels = sampled_data.edge_label

In [None]:
edge_label_index.shape[0]

In [None]:
edge_index_self_loops

In [None]:
n_self_loops = edge_index_self_loops.shape[1] - edge_label_index.shape[1]

In [None]:
n_self_loops

In [None]:
sampled_data.x.shape[0] ** 2

In [None]:
(sampled_data.edge_label == 0).sum()

In [None]:
torch.cat((edge_labels, torch.ones(n_self_loops)))

In [None]:
edge_index_self_loops.size()

In [None]:
sampled_data.val_mask

In [None]:
sampled_data.x[sampled_data.val_mask]

In [None]:
t1 = torch.tensor([[1, 2, 3 ,4, 2, 5], [1, 2, 7, 4, 2, 5]])
t1
t1_no_dups = torch.unique(t1, dim=1,return_inverse=True)

In [None]:
t1

In [None]:
t1_no_dups

In [None]:
sampled_data.edge_label

In [None]:
torch.tensor([1])

In [None]:
sort_index = edge_label_index[0].sort(dim=-1).indices
# edge_labels_sorted = edge_label[sort_index]

In [None]:
edge_label_index = torch.cat((sampled_data.edge_label_index, torch.tensor([[2, 1], [3, 6]])), dim=1)
edge_label = torch.cat((sampled_data.edge_label, torch.tensor([0, 1.])), dim=0)

In [None]:
edge_label_index

In [None]:
edge_label

In [None]:
def unique(x, dim=-1):
    unique, inverse = torch.unique(x, return_inverse=True, dim=dim)
    perm = torch.arange(inverse.size(dim), dtype=inverse.dtype, device=inverse.device)
    inverse, perm = inverse.flip([dim]), perm.flip([dim])
    return unique, inverse.new_empty(unique.size(dim)).scatter_(dim, inverse, perm)

In [None]:
edge_label_index, sort_index = unique(edge_label_index)

In [None]:
edge_label_index

In [None]:
sort_index

In [None]:
edge_label[sort_index]

In [None]:
print(sort_index)

In [None]:
sort_index = edge_label_index[0].sort(dim=-1).indices
# edge_labels_sorted = edge_label[sort_index]

In [None]:
print(sort_index)

In [None]:
edge_label_index

In [None]:
sampled_data.edge_label_index

In [None]:
from torch_geometric.utils import index_to_mask, mask_to_index, to_dense_adj

In [None]:
to_dense_adj(sampled_data.edge_label_index)

In [None]:
A = mask_to_index(index_to_mask(sampled_data.edge_label_index, 16))

In [1]:
import numpy as np
import squidpy as sq

from autotalker.data import load_spatial_adata_from_csv
from autotalker.data import prepare_data


dataset = "squidpy_seqfish"

print(f"Using dataset {dataset}.")
if dataset == "deeplinc_seqfish":
    adata = load_spatial_adata_from_csv("datasets/seqFISH/counts.csv",
                                        "datasets/seqFISH/adj.csv")
    cell_type_key = None
elif dataset == "squidpy_seqfish":
    adata = sq.datasets.seqfish()
    sq.gr.spatial_neighbors(adata, radius = 0.04, coord_type="generic")
elif dataset == "squidpy_slideseqv2":
    adata = sq.datasets.slideseqv2()
    sq.gr.spatial_neighbors(adata, radius = 30.0, coord_type="generic")

print(f"Number of nodes: {adata.X.shape[0]}")
print(f"Number of node features: {adata.X.shape[1]}")
avg_edges_per_node = round(
    adata.obsp['spatial_connectivities'].toarray().sum(axis=0).mean(),2)
print(f"Average number of edges per node: {avg_edges_per_node}")
n_edges = int(np.triu(adata.obsp['spatial_connectivities'].toarray()).sum())
print(f"Number of edges: {n_edges}", sep="")

data_dict = prepare_data(adata=adata,
                         adj_key="spatial_connectivities",
                         edge_val_ratio=0.1,
                         edge_test_ratio=0.0,
                         node_val_ratio=0.1,
                         node_test_ratio=0.0)

print(f"Edge train data: {data_dict['edge_train_data']}")
print(f"Edge val data: {data_dict['edge_val_data']}")
print(f"Edge test data: {data_dict['edge_test_data']}")
print(f"Edge test data sum: {data_dict['edge_test_data'].edge_label.sum()}")
print(f"Node masked data: {data_dict['node_masked_data']}")
print(f"Node test mask sum: {data_dict['node_masked_data'].test_mask.sum()}")

Using dataset squidpy_seqfish.
Number of nodes: 19416
Number of node features: 351
Average number of edges per node: 4.4
Number of edges: 42694
Edge train data: Data(x=[19416, 351], edge_index=[2, 76850], y=[19416, 702], edge_label=[38425], edge_label_index=[2, 38425])
Edge val data: Data(x=[19416, 351], edge_index=[2, 76850], y=[19416, 702], edge_label=[4269], edge_label_index=[2, 4269])
Edge test data: Data(x=[19416, 351], edge_index=[2, 85388], y=[19416, 702], edge_label=[0], edge_label_index=[2, 0])
Edge test data sum: 0.0
Node masked data: Data(x=[19416, 351], edge_index=[2, 85388], y=[19416, 702], train_mask=[19416], val_mask=[19416], test_mask=[19416])
Node test mask sum: 0


In [None]:
dataset.x

In [None]:
dataset.adj

In [None]:
dataset.edge_index

In [None]:
dataset.x.shape

In [None]:
dataset.adj.shape

In [None]:
x_neighbors_summed = torch.matmul(dataset.adj, dataset.x)

In [None]:
x_neighbors_summed.shape

In [None]:
y = torch.cat((dataset.x, x_neighbors_summed), dim=-1)

In [None]:
y.shape

In [None]:
adj = torch.tensor([[0, 1, 1],[1, 0, 0],[1, 0, 0]])
x = torch.tensor([[12, 14, 6],[8, 4, 2],[7, 3, 1]])

In [None]:
adj

In [None]:
torch.matmul(adj, x)