In [5]:
!pip install torch_geometric

Collecting torch_geometric
  Downloading torch_geometric-2.6.0-py3-none-any.whl.metadata (63 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m63.1/63.1 kB[0m [31m640.1 kB/s[0m eta [36m0:00:00[0m
Downloading torch_geometric-2.6.0-py3-none-any.whl (1.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: torch_geometric
Successfully installed torch_geometric-2.6.0


In [12]:
import random
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from torch import nn
from torch_geometric.nn import GCNConv, DeepGraphInfomax
import torch
import torch_geometric
from torch_geometric.datasets import Planetoid
from torch_geometric.data import Data
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
import os

In [7]:
class GCNNet(torch.nn.Module):
    def __init__(self, inp_dim, out_dim):
        super(GCNNet, self).__init__()
        self.conv1 = GCNConv(inp_dim, 32)
        self.conv2 = GCNConv(32, 64)
        self.conv3 = GCNConv(64, 128)
        self.conv4 = GCNConv(128, 128)

        self.dropout = nn.Dropout(0.5)
        self.fc = nn.Linear(128, out_dim)

    def forward(self, x, edge_index, edge_attr):
        x = F.relu(self.conv1(x, edge_index, edge_attr))
        x = F.relu(self.conv2(x, edge_index, edge_attr))
        x = F.relu(self.conv3(x, edge_index, edge_attr))
        x = F.relu(self.conv4(x, edge_index, edge_attr))

        x = self.dropout(x)
        x = self.fc(x)
        return x


class GraphClassifier:
    def __init__(self, inp_dim, out_dim, device):
        self.gcn = GCNNet(inp_dim, out_dim)
        self.gcn = self.gcn.to(device)
        self.optimizer = torch.optim.Adam(self.gcn.parameters())

    def evaluate_loss(self, data, mode):
        # use masking for loss evaluation
        x, edge_index, edge_attr = data.x, data.edge_index, data.edge_attr
        if mode == 'train':
            loss = F.cross_entropy(self.gcn(x, edge_index, edge_attr)[data.train_mask], data.y[data.train_mask])
        else:
            loss = F.cross_entropy(self.gcn(x, edge_index, edge_attr)[data.test_mask], data.y[data.test_mask])
        return loss

    def embed(self, data):
        return self.gcn(data.x, data.edge_index, data.edge_attr)

    def train(self, data):
        # training
        self.gcn.train()
        self.optimizer.zero_grad()
        loss = self.evaluate_loss(data, mode='train')
        loss.backward()
        self.optimizer.step()
        return loss.item()

    def test(self, data):
        # testing
        self.gcn.eval()
        logits, accs = self.gcn(data.x, data.edge_index, data.edge_attr), []
        loss = self.evaluate_loss(data, mode='test').item()

        for _, mask in data('train_mask', 'test_mask'):
            pred = logits[mask].max(1)[1]
            acc = pred.eq(data.y[mask]).sum().item() / mask.sum().item()
            accs.append(acc)
        return [loss] + accs


class DGIEncoderNet(torch.nn.Module):
    def __init__(self, inp_dim, out_dim):
        super(DGIEncoderNet, self).__init__()
        self.conv1 = GCNConv(inp_dim, 32)
        self.conv2 = GCNConv(32, 64)
        #self.conv3 = GCNConv(64, 128)
        #self.conv4 = GCNConv(128, 256)
        self.conv3 = GCNConv(64, out_dim)

    def forward(self, x, edge_index, edge_attr, msk=None):
        x = F.elu(self.conv1(x, edge_index, edge_attr))
        x = F.elu(self.conv2(x, edge_index, edge_attr))
        #x = F.elu(self.conv3(x, edge_index, edge_attr))
        #x = F.elu(self.conv4(x, edge_index, edge_attr))
        x = self.conv3(x, edge_index, edge_attr)
        return x


class DGILearner:
    def __init__(self, inp_dim, out_dim, device):
        self.encoder = DGIEncoderNet(inp_dim, out_dim)
        self.dgi = DeepGraphInfomax(out_dim, encoder=self.encoder, summary=self.readout, corruption=self.corrupt)
        self.dgi = self.dgi.to(device)

        self.optimizer = torch.optim.Adam(self.dgi.parameters())

    def embed(self, data):
        pos_z, _, _ = self.dgi(data.x, data.edge_index, data.edge_attr, msk=None)
        return pos_z

    def readout(self, z, x, edge_index, edge_attr, msk=None):
        if msk is None:
            return torch.sigmoid(torch.mean(z, 0))
        else:
            return torch.sigmoid(torch.sum(z[msk], 0) / torch.sum(msk))

    def corrupt(self, x, edge_index, edge_attr, msk=None):
        shuffled_rows = torch.randperm(len(x))
        shuffled_x = x[shuffled_rows, :]
        return shuffled_x, edge_index, edge_attr

    def evaluate_loss(self, data, mode):
        # use masking for loss evaluation
        pos_z_train, neg_z_train, summ_train = self.dgi(data.x, data.edge_index, data.edge_attr, msk=data.train_mask)
        pos_z_test, neg_z_test, summ_test = self.dgi(data.x, data.edge_index, data.edge_attr, msk=data.test_mask)

        if mode == 'train':
            return self.dgi.loss(pos_z_train, neg_z_train, summ_train)
        else:
            return self.dgi.loss(pos_z_test, neg_z_test, summ_test)

    def train(self, data):
        # training
        self.dgi.train()
        self.optimizer.zero_grad()
        loss = self.evaluate_loss(data, mode='train')
        loss.backward()
        self.optimizer.step()
        return loss.item()

    def test(self, data):
        # testing
        self.dgi.eval()
        return self.evaluate_loss(data, mode='test').item()

In [8]:
def train_model(dataset, train_mode, num_classes, device):
    if train_mode == 'supervised':
        model = GraphClassifier(dataset.num_node_features, num_classes, device)
    elif train_mode == 'unsupervised':
        model = DGILearner(dataset.num_node_features, 512, device)
    else:
        raise ValueError('Unsupported train mode {}'.format(train_mode))
    train_history=[]
    test_history=[]
    train_epochs = 81 if train_mode == 'supervised' else 1001
    for epoch in range(0, train_epochs):
        train_loss = model.train(dataset)
        if epoch % 5 == 0:
            if train_mode == 'unsupervised':
                log = 'Epoch: {:03d}, train_loss: {:.3f}, test_loss:{:.3f}'
                test_loss = model.test(dataset)
                print(log.format(epoch, train_loss, test_loss))
            else:
                log = 'Epoch: {:03d}, train_loss: {:.3f}, test_loss:{:.3f}, train_acc: {:.2f}, test_acc: {:.2f}'
                print(log.format(epoch, train_loss, *model.test(dataset)))

    return model.embed(dataset).detach().cpu().numpy()

In [9]:
name='Gavin'
newpath = r"./"+name

In [10]:
newpath = r"./"+name+"/Identity_Matrix/"
newpath

'./Gavin/Identity_Matrix/'

In [13]:
name=input("Enter the name of the dataset: ")
symbol=name[0]
timesteps=6
newpath = r"./"+name+"/No_Feature/"
if not os.path.exists(newpath):
    os.makedirs(newpath)
for l in range(0,timesteps):
  print("Currently creating embedding for",name,"at timestep",l+1)
  s="./"+name+"/Identity_Matrix/"+symbol+"I"+(str(l+1))+".csv"
  x=pd.read_csv(s)
  x.rename(columns={'Unnamed: 0': 'Proteins'}, inplace=True)
  x=x.set_axis(x.iloc[:,0], axis=0)
  del x[x.columns[0]]
  proteins=list(x.index)
  s="./"+name+"/Networks/"+symbol+"N"+(str(l+1))+".csv"
  edge_index=pd.read_csv(s)
  edge_attr=list(edge_index.iloc[:,2])
  del edge_index[edge_index.columns[2]]
  for i in range(len(edge_index)):
    edge_index.iloc[i,0]=proteins.index(edge_index.iloc[i,0])
    edge_index.iloc[i,1]=proteins.index(edge_index.iloc[i,1])
  edge_index_c=edge_index.copy()
  edge_index_c.iloc[:,0]=edge_index.iloc[:,1]
  edge_index_c.iloc[:,1]=edge_index.iloc[:,0]
  edges=pd.concat([edge_index, edge_index_c], axis=0, ignore_index=True)
  edge_index=np.array(edges.transpose(),dtype=int)
  edge_attr.extend(edge_attr.copy())
  x = torch.tensor(np.array(x), dtype=torch.float)
  edge_index = torch.tensor(edge_index, dtype=torch.long)
  edge_attr = torch.tensor(edge_attr, dtype=torch.float)
  train_mask=[True for i in range(round(len(proteins)*0.8))]
  train_mask.extend([False for i in range(round(len(proteins)*0.8),len(proteins))])
  train_mask=torch.tensor(train_mask,dtype=torch.bool)
  test_mask=[False for i in range(round(len(proteins)*0.8))]
  test_mask.extend([True for i in range(round(len(proteins)*0.8),len(proteins))])
  test_mask=torch.tensor(test_mask,dtype=torch.bool)
  data = Data(x=x, edge_index=edge_index, edge_attr=edge_attr,train_mask=train_mask,test_mask=test_mask)
  data = data.to(device)
  embed = train_model(data, "unsupervised", 512, device)
  embedding=pd.DataFrame(embed)
  embedding=embedding.set_axis(proteins, axis=0)
  s="./"+name+"/No_Feature/"+symbol+"E"+str(l+1)+".csv"
  embedding.to_csv(s)

Enter the name of the dataset: Gavin
Currently creating embedding for Gavin at timestep 1


FileNotFoundError: [Errno 2] No such file or directory: './Gavin/Identity_Matrix/GI1.csv'