In [1]:
! pip install torch
! pip install torch_geometric



In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.data import HeteroData
from torch_geometric.nn import RGCNConv

class RGCN(nn.Module):
    def __init__(self, in_feat, hidden_feat, out_feat, num_rels):
        super(RGCN, self).__init__()
        self.rgcn1 = RGCNConv(in_feat, hidden_feat, num_rels)
        self.rgcn2 = RGCNConv(hidden_feat, out_feat, num_rels)

    def forward(self, data):
        x, edge_index, edge_type = data.x, data.edge_index, data.edge_attr

        x = self.rgcn1(x, edge_index, edge_type)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)

        x = self.rgcn2(x, edge_index, edge_type)

        return x

In [3]:
# Create a small heterogeneous graph
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
edge_type = torch.tensor([0, 1, 0, 1], dtype=torch.long)
x = torch.randn(3, 10)  # 3 nodes with 10 features each

# Create a HeteroData object
data = HeteroData()

# Add node and edge data for each type
data['node_type_1'].x = x
data['edge_type_1'].edge_index = edge_index
data['edge_type_1'].edge_type = edge_type

# Initialize the R-GCN model
model = RGCN(in_feat=10, hidden_feat=16, out_feat=8, num_rels=2)

# Forward pass
output = model(data)

# Print the output node features
print(output)

AttributeError: 'HeteroData' has no attribute 'x'

In [4]:


# Example data
# Replace these with your actual graph data
nodes = {
    'Biological_sample': [0, 1, 2],
    'Protein': [5, 4, 3],
    'Gene': [6, 7, 8],
    'Disease': [9, 10, 11]
}

edges = {
    'Biological_sample-Protein': [(0, 5), (1, 4)],
    'Gene-Protein': [(8, 5), (7, 3)],
    'Disease-Protein': [(9, 4), (10, 5)],
    'Disease-Protein-Biomarker': [(9, 4), (10, 5)],
}

# Step 1: Extract nodes and relationships
node_types = list(nodes.keys())
edge_types = list(edges.keys())

# Step 2: Create node and edge feature tensors
node_features = {node_type: torch.randn(len(nodes[node_type]), 1) for node_type in node_types}
edge_indices = {edge_type: torch.tensor(edges[edge_type], dtype=torch.long).t() for edge_type in edge_types}

# Step 3: Create HeteroData object
data = HeteroData()

# Add node features
for node_type in node_types:
    data[node_type].x = node_features[node_type]

# Add edge indices
for edge_type in edge_types:
    src, dst = edge_indices[edge_type]
    data[edge_type] = torch.stack([src, dst])

# Display the HeteroData object
print(data)

model = RGCN(in_feat=1, hidden_feat=16, out_feat=8, num_rels=len(edge_types))

HeteroData(
  Biological_sample-Protein=[2, 2],
  Gene-Protein=[2, 2],
  Disease-Protein=[2, 2],
  Disease-Protein-Biomarker=[2, 2],
  Biological_sample={ x=[3, 1] },
  Protein={ x=[3, 1] },
  Gene={ x=[3, 1] },
  Disease={ x=[3, 1] }
)


In [5]:
# Initialize the labels tensor with zeros
labels = torch.zeros(len(nodes['Disease']), len(nodes['Biological_sample']))

# Set the label to 1 for each pair that is connected
# Replace this with your actual connections
connections = [(9, 0), (10, 1)]  # Example connections
for disease, sample in connections:
    disease_index = nodes['Disease'].index(disease)
    sample_index = nodes['Biological_sample'].index(sample)
    labels[disease_index, sample_index] = 1

# Display the labels tensor
print(labels)


# Modify the model
class LinkPredictionModel(nn.Module):
    def __init__(self, in_feat, hidden_feat, out_feat, num_rels):
        super().__init__()
        self.conv1 = RGCNConv(in_feat, hidden_feat, num_rels, num_bases=None)
        self.conv2 = RGCNConv(hidden_feat, out_feat, num_rels, num_bases=None)

    def forward(self, data):
        x = data['Disease'].x  # Replace 'Disease' with the correct node type
        edge_index = data['Disease_Biological_sample'].edge_index  # Replace 'Disease_Biological_sample' with the correct edge type
        edge_type = data['Disease_Biological_sample'].edge_type  # Replace 'Disease_Biological_sample' with the correct edge type

        x = self.conv1(x, edge_index, edge_type)
        x = F.relu(x)
        x = self.conv2(x, edge_index, edge_type)
        return x

model = LinkPredictionModel(in_feat=1, hidden_feat=16, out_feat=8, num_rels=len(edge_types))

# Define a loss function and optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.CrossEntropyLoss()
# Train the model
for epoch in range(100):  # Number of epochs
    optimizer.zero_grad()
    scores = model(data)
    loss = loss_fn(scores, labels)
    loss.backward()
    optimizer.step()

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 0.]])


AttributeError: 'NodeStorage' object has no attribute 'edge_index'

In [15]:
# TODO: define real target labels
target = torch.tensor([0, 1, 0], dtype=torch.long) 

In [18]:
from torch_geometric.loader import DataLoader

# Step 1: Define the RGCN model
class RGCN(nn.Module):
    def __init__(self, in_channels, out_channels, num_relations):
        super(RGCN, self).__init__()
        self.conv1 = RGCNConv(in_channels, out_channels, num_relations)
        self.conv2 = RGCNConv(out_channels, out_channels, num_relations)

    def forward(self, data):
        x, edge_index, edge_type = data.x, data.edge_index, data.edge_type
        x = self.conv1(x, edge_index, edge_type)
        x = torch.relu(x)
        x = self.conv2(x, edge_index, edge_type)
        return x

# Step 2: Define a data loader
batch_size = 32
loader = DataLoader(data, batch_size=batch_size, shuffle=True)

# Step 3: Train the RGCN model
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

model = RGCN(in_channels=16, out_channels=32, num_relations=len(edges))
model.to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

model.train()
for epoch in range(100):
    total_loss = 0
    for batch in loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        output = model(batch)
        loss = criterion(output, labels)  # Add your label data here
        loss.backward()
        optimizer.step()
        total_loss += loss.item() * batch.num_graphs
    print(f"Epoch {epoch + 1}, Loss: {total_loss / len(data)}")

TypeError: object of type 'int' has no len()

In [None]:
for epoch in range(100):
    model.train()
    optimizer.zero_grad()
    output = model(data)
    loss = loss_fn(output, target)
    loss.backward()
    optimizer.step()
    if epoch % 10 == 0:
        print(f'Epoch {epoch}, Loss: {loss.item()}')

In [None]:
# TODO: Define the target labels
target = torch.tensor([0, 1, 0], dtype=torch.long)
output = model(data)

loss = loss_fn(output, target)
print(loss)
# train the model with your data
#model.train()