In [1]:
import csv
import pandas as pd
import numpy as np
import anndata as ad
from tqdm import tqdm
import PyWGCNA as pwc
from torch_geometric.data import Data
from torch_geometric.utils import to_edge_index
import torch

In [2]:
with open('data/data.csv') as f:
    reader = csv.reader(f)
    #remove white space 
    data = [row for row in reader]
    data = [[x.strip() for x in row] for row in data]
    data_pd = pd.DataFrame(data[1:], columns=data[0])
data_pd['Sample_ID'] = range(1, len(data_pd) + 1)
train_data = data_pd.sample(frac=0.8, random_state=0)
test_data = data_pd.drop(train_data.index)
train_data_X = train_data.drop(columns=['Sample_ID', 'Label'])
test_data_X = test_data.drop(columns=['Sample_ID', 'Label'])

In [None]:
#write train and test data X in csv
##### NO NEED TO RUN THIS AGAIN #####
train_data_X.to_csv('data/train_data_X.csv', index=False)
test_data_X.to_csv('data/test_data_X.csv', index=False)

In [3]:
train_data_X_ad = ad.io.read_csv('data/train_data_X.csv')
test_data_X_ad = ad.io.read_csv('data/test_data_X.csv')

In [4]:
train_data_adj = pwc.WGCNA.adjacency(train_data_X_ad.to_df())
test_data_adj = pwc.WGCNA.adjacency(test_data_X_ad.to_df())

[96mcalculating adjacency matrix ...[0m
	Done..

[96mcalculating adjacency matrix ...[0m
	Done..



In [5]:
#set all values to 0 for values less than 0.3, other values are kept the same
train_data_adj[train_data_adj < 0.3] = 0
test_data_adj[test_data_adj < 0.3] = 0

In [6]:
train_data_adj

array([[1., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 0., 0., 0.],
       [0., 0., 1., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 1., 0., 0.],
       [0., 0., 0., ..., 0., 1., 0.],
       [0., 0., 0., ..., 0., 0., 1.]], shape=(38454, 38454))

In [7]:
#convert train_data_adj to coo matrix
import scipy.sparse as sp
train_data_adj_coo = sp.coo_matrix(train_data_adj)
test_data_adj_coo = sp.coo_matrix(test_data_adj)

In [8]:
train_data_adj_torch = torch.sparse_coo_tensor(train_data_adj_coo.nonzero(), train_data_adj_coo.data, train_data_adj_coo.shape)
test_data_adj_torch = torch.sparse_coo_tensor(test_data_adj_coo.nonzero(), test_data_adj_coo.data, test_data_adj_coo.shape)


In [10]:
def create_graph(adj, data):
    graphs = []
    edge_index, edge_attr = to_edge_index(adj)
    for sample in tqdm(data.values):
        x = torch.tensor(sample[2:-1].astype(float), dtype=torch.float)
        y = torch.tensor([0 if sample[0]=='N' else 1], dtype=torch.long)
        graphs.append(Data(x=x, y=y, edge_index=edge_index, edge_attr=edge_attr))
    return graphs

In [11]:
graph_data_train = create_graph(train_data_adj_torch, train_data)
graph_data_test = create_graph(test_data_adj_torch, test_data)

100%|██████████| 130/130 [00:03<00:00, 40.44it/s]
100%|██████████| 32/32 [00:00<00:00, 40.36it/s]


In [12]:
#save the graph data
torch.save(graph_data_train, 'data/graph_data_train.pt')
torch.save(graph_data_test, 'data/graph_data_test.pt')

In [19]:
import torch 
from torch_geometric.utils import to_edge_index
A = torch.randn(5, 5)


In [24]:
A_1 = A < 0.3

In [26]:
A_coo_t = A_1.to_sparse()
A_coo_t

tensor(indices=tensor([[0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4],
                       [2, 3, 4, 2, 4, 0, 1, 2, 3, 4, 0, 1, 2, 4, 2, 3]]),
       values=tensor([True, True, True, True, True, True, True, True, True,
                      True, True, True, True, True, True, True]),
       size=(5, 5), nnz=16, layout=torch.sparse_coo)

In [27]:
ed, et = to_edge_index(A_coo_t)

In [28]:
et, ed

(tensor([True, True, True, True, True, True, True, True, True, True, True, True,
         True, True, True, True]),
 tensor([[0, 0, 0, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4],
         [2, 3, 4, 2, 4, 0, 1, 2, 3, 4, 0, 1, 2, 4, 2, 3]]))

In [16]:
#check whether et is a boolean tensor
et.dtype

torch.bool

In [17]:
et1 = et.float()

In [18]:
et1

tensor([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])