In [None]:
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)

!pip install -q torch-scatter -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q torch-sparse -f https://data.pyg.org/whl/torch-${TORCH}.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git

1.12.1+cu113


In [None]:
# import libraries 
import torch
import torch.nn.functional as F

import torch_geometric.transforms as T
from torch_geometric.nn import GCNConv, SAGEConv, GATConv

In [None]:
!pip install rdkit-pypi

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting rdkit-pypi
  Downloading rdkit_pypi-2022.3.5-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (36.8 MB)
[K     |████████████████████████████████| 36.8 MB 40 kB/s 
Installing collected packages: rdkit-pypi
Successfully installed rdkit-pypi-2022.3.5


In [None]:
!pip install ogb

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from torch_geometric.data import DataLoader

dataset = PygGraphPropPredDataset(name = 'ogbg-molhiv')
# dataset.data.to(device)
split_idx = dataset.get_idx_split() 
train_loader = DataLoader(dataset[split_idx["train"]], batch_size=100 ,shuffle=True)
valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=100, shuffle=False)
test_loader = DataLoader(dataset[split_idx["test"]], batch_size=100, shuffle=False)



In [None]:
dataset

PygGraphPropPredDataset(41127)

In [None]:
# define the device
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device = torch.device(device)
device

device(type='cuda')

In [None]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.nn import global_mean_pool

In [None]:
from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder
atom_encoder = AtomEncoder(emb_dim = 18)
bond_encoder = BondEncoder(emb_dim = 18)
atom_encoder = atom_encoder.to(device)
# bond_encoder = bond_encoder.to(device)

In [None]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv,SAGEConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels,emb_dim):
        super().__init__()
        self.conv1 = SAGEConv(emb_dim, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels*2)
        self.conv3 = GATConv(hidden_channels*2, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self,x,edge_index,batch):
        # 1. Obtain node embeddings 
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.2, training=self.training)
        x = self.conv3(x, edge_index)
        x = F.dropout(x, p=0.5, training=self.training)

        x = global_mean_pool(x,batch)  

        # x = F.dropout(x, p=0.2, training=self.training)
        x = self.lin(x)
        x = F.log_softmax(x, dim = 1)
        
        return x

model1 = GCN(hidden_channels=64,emb_dim = 18)
print(model1)

GCN(
  (conv1): SAGEConv(18, 64, aggr=mean)
  (conv2): SAGEConv(64, 128, aggr=mean)
  (conv3): GATConv(128, 64, heads=1)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [None]:
optimizer = torch.optim.Adam(model1.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
model1 = model1.to(device)
dataset.data = dataset.data.to(device)
# optimizer = torch.optim.Adam(list(model.parameters())+list(atom_encoder.parameters())+list(bond_encoder.parameters()), 
#                           lr=0.01)
evaluator = Evaluator(name='ogbg-molhiv')

def train():
  model1.train()
  for data in train_loader:
    data = data.to(device)
    data.x = atom_encoder(data.x)
    # data.edge_attr = bond_encoder(data.edge_attr)
  # Iterate in batches over the training dataset.
    out = model1(data.x, data.edge_index, data.batch) 
    loss = criterion(out, data.y.squeeze(1))  
    loss.backward()  
    optimizer.step()  
    optimizer.zero_grad()
    return float(loss)

In [None]:

evaluator = Evaluator(name='ogbg-molhiv')

def test(test_loader):
    model1.eval()
    y_label = []
    y_predi = []
    for data in test_loader:
      data = data.to(device)
      data.x = atom_encoder(data.x)
      # data.edge_attr = bond_encoder(data.edge_attr)
    # Iterate in batches over the training dataset.
      out = model1(data.x,data.edge_index,data.batch)  
      y_predi_loader = out[:,1]
      y_label.append(data.y.view(y_predi_loader.shape).detach().cpu())
      y_predi.append(y_predi_loader.detach().cpu())
    
    y_label = torch.cat(y_label, dim = 0).numpy()
    y_predi = torch.cat(y_predi, dim = 0).numpy()
    # # y_label = np.array(y_label)
    # # y_predi = np.array(y_label)
    # evaluator = Evaluator(name='ogbg-molhiv')
    # y_label = torch.tensor(y_label)
    # y_predi = torch.tensor(y_predi)

    precision = evaluator.eval({'y_true': y_label.reshape(len(y_label),1),
                                'y_pred': y_predi.reshape(len(y_label),1),
                               })
    
    return precision['rocauc']

In [None]:
losses1 = []
train_acc_list1 = []
test_acc_list1 = []

for epoch in range(1, 200):
    loss = train()
    losses1.append(loss)
    train_acc = test(train_loader)
    train_acc_list1.append(train_acc)
    test_acc = test(test_loader)
    test_acc_list1.append(test_acc)
    # if epoch % 10==0:
    print(f'Epoch: {epoch:03d}, Loss :{loss:.4f}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Loss :0.9166, Train Acc: 0.5029, Test Acc: 0.6113
Epoch: 002, Loss :0.0774, Train Acc: 0.5091, Test Acc: 0.6211
Epoch: 003, Loss :0.1713, Train Acc: 0.5009, Test Acc: 0.6408
Epoch: 004, Loss :0.5129, Train Acc: 0.4968, Test Acc: 0.6233
Epoch: 005, Loss :0.3080, Train Acc: 0.4926, Test Acc: 0.5958
Epoch: 006, Loss :0.1490, Train Acc: 0.4964, Test Acc: 0.5904
Epoch: 007, Loss :0.0764, Train Acc: 0.4975, Test Acc: 0.5908
Epoch: 008, Loss :0.2120, Train Acc: 0.4979, Test Acc: 0.5890
Epoch: 009, Loss :0.1911, Train Acc: 0.4988, Test Acc: 0.5893
Epoch: 010, Loss :0.1861, Train Acc: 0.5009, Test Acc: 0.5888
Epoch: 011, Loss :0.2457, Train Acc: 0.5013, Test Acc: 0.5881
Epoch: 012, Loss :0.2839, Train Acc: 0.5006, Test Acc: 0.5886
Epoch: 013, Loss :0.2125, Train Acc: 0.5003, Test Acc: 0.5888
Epoch: 014, Loss :0.2446, Train Acc: 0.5012, Test Acc: 0.5871
Epoch: 015, Loss :0.1134, Train Acc: 0.5025, Test Acc: 0.5862
Epoch: 016, Loss :0.1095, Train Acc: 0.5044, Test Acc: 0.5859
Epoch: 0

In [None]:
import pandas as pd
import plotly.express as px

In [None]:
df1 = pd.DataFrame(list(zip(train_acc_list1,test_acc_list1, losses1)), columns =['train_acc1', 'test_acc1','loss1'])
df1.head()

Unnamed: 0,train_acc1,test_acc1,loss1
0,0.502927,0.611337,0.916597
1,0.509117,0.621124,0.077439
2,0.500871,0.640795,0.171282
3,0.496803,0.623273,0.512851
4,0.492562,0.595774,0.308031


In [None]:
losses_float = [loss for loss in losses1] 
loss_indices = [i for i,l in enumerate(losses_float)] 
fig = px.line(df1, x=loss_indices, y=["train_acc1", "test_acc1", "loss1"], title="Mesure de performance pour le modele 1: 2 couches SAGE et une GAT",
            labels={"train_acc1": "train_acc1", "loss": "loss1","test_acc1":"test_acc1"})
fig.show()

**model 2**

set batch_size  = 32

In [None]:
from ogb.graphproppred import PygGraphPropPredDataset, Evaluator
from torch_geometric.data import DataLoader

dataset = PygGraphPropPredDataset(name = 'ogbg-molhiv')
# dataset.data.to(device) 
split_idx = dataset.get_idx_split() 
train_loader = DataLoader(dataset[split_idx["train"]], batch_size=64 ,shuffle=True)
valid_loader = DataLoader(dataset[split_idx["valid"]], batch_size=64, shuffle=False)
test_loader = DataLoader(dataset[split_idx["test"]], batch_size=64, shuffle=False)


'data.DataLoader' is deprecated, use 'loader.DataLoader' instead



In [None]:
from ogb.graphproppred.mol_encoder import AtomEncoder, BondEncoder
atom_encoder = AtomEncoder(emb_dim = 24)
bond_encoder = BondEncoder(emb_dim = 24)
atom_encoder = atom_encoder.to(device)
# bond_encoder = bond_encoder.to(device)

In [None]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv,SAGEConv,GATConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.conv1 = GATConv(24, hidden_channels)
        self.conv2 = SAGEConv(hidden_channels, hidden_channels,aggr="add")
        # self.conv1 = GCNConv(dataset.num_features, hidden_channels)
        # self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = SAGEConv(hidden_channels, hidden_channels, aggr="add")
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self,x,edge_index,batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index)

        x = global_mean_pool(x,batch) 

        x = F.dropout(x, p=0.2, training=self.training)
        x = self.lin(x)
        x = F.log_softmax(x, dim = 1)
        
        return x

model2 = GCN(hidden_channels=64)
print(model2)

GCN(
  (conv1): GATConv(24, 64, heads=1)
  (conv2): SAGEConv(64, 64, aggr=add)
  (conv3): SAGEConv(64, 64, aggr=add)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [None]:
optimizer = torch.optim.Adam(model2.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
model2 = model2.to(device)
dataset.data = dataset.data.to(device)
# optimizer = torch.optim.Adam(list(model.parameters())+list(atom_encoder.parameters())+list(bond_encoder.parameters()), 
#                           lr=0.01)
evaluator = Evaluator(name='ogbg-molhiv')

def train():
  model2.train()
  for data in train_loader:
    data = data.to(device)
    data.x = atom_encoder(data.x)
    # data.edge_attr = bond_encoder(data.edge_attr)
  # Iterate in batches over the training dataset.
    optimizer.zero_grad()
    out = model2(data.x, data.edge_index, data.batch)  
    loss = criterion(out, data.y.squeeze(1))  
    loss.backward()  
    optimizer.step()  
    return float(loss)

In [None]:

evaluator = Evaluator(name='ogbg-molhiv')

def test(test_loader):
    model2.eval()
    y_label = []
    y_predi = []
    for data in test_loader:
      data = data.to(device)
      data.x = atom_encoder(data.x)
      # data.edge_attr = bond_encoder(data.edge_attr)
    # Iterate in batches over the training dataset.
      out = model2(data.x,data.edge_index,data.batch)  
      y_predi_loader = out[:,1]
      y_label.append(data.y.view(y_predi_loader.shape).detach().cpu())
      y_predi.append(y_predi_loader.detach().cpu())
    
    y_label = torch.cat(y_label, dim = 0).numpy()
    y_predi = torch.cat(y_predi, dim = 0).numpy()
    # # y_label = np.array(y_label)
    # # y_predi = np.array(y_label)
    # evaluator = Evaluator(name='ogbg-molhiv')
    # y_label = torch.tensor(y_label)
    # y_predi = torch.tensor(y_predi)

    precision = evaluator.eval({'y_true': y_label.reshape(len(y_label),1),
                                'y_pred': y_predi.reshape(len(y_label),1),
                               })
    
    return precision['rocauc']

In [None]:
losses2 = []
train_acc_list2 = []
test_acc_list2 = []

for epoch in range(1, 200):
    loss = train()
    losses2.append(loss)
    train_acc = test(train_loader)
    train_acc_list2.append(train_acc)
    test_acc = test(test_loader)
    test_acc_list2.append(test_acc)
    # if epoch % 10==0:
    print(f'Epoch: {epoch:03d}, loss:{loss:.4f} Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, loss:0.5792 Train Acc: 0.5042, Test Acc: 0.5842
Epoch: 002, loss:0.1438 Train Acc: 0.5120, Test Acc: 0.5455
Epoch: 003, loss:0.5093 Train Acc: 0.5149, Test Acc: 0.5486
Epoch: 004, loss:0.1308 Train Acc: 0.5151, Test Acc: 0.5507
Epoch: 005, loss:0.2634 Train Acc: 0.5184, Test Acc: 0.5460
Epoch: 006, loss:0.2296 Train Acc: 0.5225, Test Acc: 0.5738
Epoch: 007, loss:0.1528 Train Acc: 0.5095, Test Acc: 0.5850
Epoch: 008, loss:0.2360 Train Acc: 0.4901, Test Acc: 0.5752
Epoch: 009, loss:0.2752 Train Acc: 0.4660, Test Acc: 0.5422
Epoch: 010, loss:0.1417 Train Acc: 0.4459, Test Acc: 0.4950
Epoch: 011, loss:0.1327 Train Acc: 0.4325, Test Acc: 0.4608
Epoch: 012, loss:0.3359 Train Acc: 0.4304, Test Acc: 0.4612
Epoch: 013, loss:0.0900 Train Acc: 0.4309, Test Acc: 0.4670
Epoch: 014, loss:0.1808 Train Acc: 0.4309, Test Acc: 0.4745
Epoch: 015, loss:0.2198 Train Acc: 0.4331, Test Acc: 0.4873
Epoch: 016, loss:0.1427 Train Acc: 0.4371, Test Acc: 0.5004
Epoch: 017, loss:0.2472 Train Acc: 0.447

In [None]:
df2 = pd.DataFrame(list(zip(train_acc_list2,test_acc_list2, losses2)), columns =['train_acc2', 'test_acc2','loss2'])
df2.head()

Unnamed: 0,train_acc2,test_acc2,loss2
0,0.504238,0.584167,0.579217
1,0.51196,0.545464,0.143792
2,0.514905,0.548583,0.509336
3,0.515087,0.550671,0.130809
4,0.518371,0.546012,0.26337


In [None]:
losses_float = [loss for loss in losses2] 
loss_indices = [i for i,l in enumerate(losses_float)] 
fig = px.line(df2, x=loss_indices, y=["train_acc2", "test_acc2", "loss2"], title="Mesure de performance pour le modele 1: 2 couches SAGE et une GAT",
            labels={"train_acc2": "train_acc2", "loss2": "loss","test_acc2":"test_acc2"})
fig.show()

model3

In [None]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv,SAGEConv,GATConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.conv1 = GATConv(24, hidden_channels)
        self.conv2 = GATConv(hidden_channels, hidden_channels)
        # self.conv1 = GCNConv(dataset.num_features, hidden_channels)
        # self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GATConv(hidden_channels, hidden_channels)
        self.lin = Linear(hidden_channels, dataset.num_classes)

    def forward(self,x,edge_index,batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index)

        x = global_mean_pool(x,batch)  

        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        x = F.log_softmax(x, dim = 1)
        
        return x

model3 = GCN(hidden_channels=64)
print(model3)

GCN(
  (conv1): GATConv(24, 64, heads=1)
  (conv2): GATConv(64, 64, heads=1)
  (conv3): GATConv(64, 64, heads=1)
  (lin): Linear(in_features=64, out_features=2, bias=True)
)


In [None]:
optimizer = torch.optim.Adam(model3.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
model3 = model3.to(device)
dataset.data = dataset.data.to(device)
# optimizer = torch.optim.Adam(list(model.parameters())+list(atom_encoder.parameters())+list(bond_encoder.parameters()), 
#                           lr=0.01)
evaluator = Evaluator(name='ogbg-molhiv')

def train():
  model3.train()
  for data in train_loader:
    data = data.to(device)
    data.x = atom_encoder(data.x)
    # data.edge_attr = bond_encoder(data.edge_attr)
  # Iterate in batches over the training dataset.
    optimizer.zero_grad()
    out = model3(data.x, data.edge_index, data.batch)  
    loss = criterion(out, data.y.squeeze(1)) 
    loss.backward() 
    optimizer.step() 
    return float(loss)

In [None]:

evaluator = Evaluator(name='ogbg-molhiv')

def test(test_loader):
    model3.eval()
    y_label = []
    y_predi = []
    for data in test_loader:
      data = data.to(device)
      data.x = atom_encoder(data.x)
      # data.edge_attr = bond_encoder(data.edge_attr)
    # Iterate in batches over the training dataset.
      out = model3(data.x,data.edge_index,data.batch)  
      y_predi_loader = out[:,1]
      y_label.append(data.y.view(y_predi_loader.shape).detach().cpu())
      y_predi.append(y_predi_loader.detach().cpu())
    
    y_label = torch.cat(y_label, dim = 0).numpy()
    y_predi = torch.cat(y_predi, dim = 0).numpy()
    # # y_label = np.array(y_label)
    # # y_predi = np.array(y_label)
    # evaluator = Evaluator(name='ogbg-molhiv')
    # y_label = torch.tensor(y_label)
    # y_predi = torch.tensor(y_predi)

    precision = evaluator.eval({'y_true': y_label.reshape(len(y_label),1),
                                'y_pred': y_predi.reshape(len(y_label),1),
                               })
    
    return precision['rocauc']

In [None]:
losses3 = []
train_acc_list3 = []
test_acc_list3 = []

for epoch in range(1, 150):
    loss = train()
    losses3.append(loss)
    train_acc = test(train_loader)
    train_acc_list3.append(train_acc)
    test_acc = test(test_loader)
    test_acc_list3.append(test_acc)
    # if epoch % 10==0:
    print(f'Epoch: {epoch:03d}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Train Acc: 0.5194, Test Acc: 0.5487
Epoch: 002, Train Acc: 0.5306, Test Acc: 0.5939
Epoch: 003, Train Acc: 0.5323, Test Acc: 0.6056
Epoch: 004, Train Acc: 0.5295, Test Acc: 0.6038
Epoch: 005, Train Acc: 0.5276, Test Acc: 0.6047
Epoch: 006, Train Acc: 0.5285, Test Acc: 0.6053
Epoch: 007, Train Acc: 0.5266, Test Acc: 0.6047
Epoch: 008, Train Acc: 0.5260, Test Acc: 0.6021
Epoch: 009, Train Acc: 0.5228, Test Acc: 0.5972
Epoch: 010, Train Acc: 0.5167, Test Acc: 0.5888
Epoch: 011, Train Acc: 0.5115, Test Acc: 0.5767
Epoch: 012, Train Acc: 0.5076, Test Acc: 0.5663
Epoch: 013, Train Acc: 0.5035, Test Acc: 0.5558
Epoch: 014, Train Acc: 0.5089, Test Acc: 0.5584
Epoch: 015, Train Acc: 0.5129, Test Acc: 0.5624
Epoch: 016, Train Acc: 0.5148, Test Acc: 0.5640
Epoch: 017, Train Acc: 0.5179, Test Acc: 0.5677
Epoch: 018, Train Acc: 0.5218, Test Acc: 0.5703
Epoch: 019, Train Acc: 0.5242, Test Acc: 0.5762
Epoch: 020, Train Acc: 0.5268, Test Acc: 0.5819
Epoch: 021, Train Acc: 0.5305, Test Acc:

In [None]:
df3 = pd.DataFrame(list(zip(train_acc_list3,test_acc_list3, losses3)), columns =['train_acc3', 'test_acc3','loss3'])
df3.head(3)

Unnamed: 0,train_acc3,test_acc3,loss3
0,0.519387,0.548713,0.633775
1,0.530647,0.593859,0.192105
2,0.532333,0.605603,0.21148


In [None]:
losses_float = [loss for loss in losses3] 
loss_indices = [i for i,l in enumerate(losses_float)] 
fig = px.line(df3, x=loss_indices, y=["train_acc3", "test_acc3", "loss3"], title="Mesure de performance pour le modele 3: avec 3 couches de GAT",
            labels={"train_acc3": "train_acc3", "loss3": "loss3","test_acc3":"test_acc3"})
fig.show()

model4

In [None]:
from torch.nn import Linear
import torch.nn.functional as F
from torch_geometric.nn import GCNConv,SAGEConv,GATConv
from torch_geometric.nn import global_mean_pool


class GCN(torch.nn.Module):
    def __init__(self, hidden_channels):
        super().__init__()
        self.conv1 = GATConv(24, hidden_channels,heads=3)
        self.conv2 = GATConv(hidden_channels*3, hidden_channels,heads=3)
        # self.conv1 = GCNConv(dataset.num_features, hidden_channels)
        # self.conv2 = GCNConv(hidden_channels, hidden_channels)
        self.conv3 = GATConv(hidden_channels*3, hidden_channels,heads = 3)
        self.lin = Linear(hidden_channels*3, dataset.num_classes)

    def forward(self,x,edge_index,batch):
        x = self.conv1(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv2(x, edge_index)
        x = x.relu()
        x = F.dropout(x, p=0.5, training=self.training)
        x = self.conv3(x, edge_index)

        x = global_mean_pool(x,batch)  

        x = F.dropout(x, p=0.5, training=self.training)
        x = self.lin(x)
        x = F.log_softmax(x, dim = 1)
        
        return x

model4 = GCN(hidden_channels=64)
print(model4)

GCN(
  (conv1): GATConv(24, 64, heads=3)
  (conv2): GATConv(192, 64, heads=3)
  (conv3): GATConv(192, 64, heads=3)
  (lin): Linear(in_features=192, out_features=2, bias=True)
)


In [None]:
optimizer = torch.optim.Adam(model4.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
model4 = model4.to(device)
dataset.data = dataset.data.to(device)
# optimizer = torch.optim.Adam(list(model.parameters())+list(atom_encoder.parameters())+list(bond_encoder.parameters()), 
#                           lr=0.01)
evaluator = Evaluator(name='ogbg-molhiv')

def train():
  model4.train()
  for data in train_loader:
    data = data.to(device)
    data.x = atom_encoder(data.x)
    # data.edge_attr = bond_encoder(data.edge_attr)
  # Iterate in batches over the training dataset.
    optimizer.zero_grad()
    out = model4(data.x, data.edge_index, data.batch)  
    loss = criterion(out, data.y.squeeze(1)) 
    loss.backward()  
    optimizer.step()  
    return float(loss)

In [None]:

evaluator = Evaluator(name='ogbg-molhiv')

def test(test_loader):
    model4.eval()
    y_label = []
    y_predi = []
    for data in test_loader:
      data = data.to(device)
      data.x = atom_encoder(data.x)
      # data.edge_attr = bond_encoder(data.edge_attr)
    # Iterate in batches over the training dataset.
      out = model4(data.x,data.edge_index,data.batch)  # Perform a single forward pass.
      y_predi_loader = out[:,1]
      y_label.append(data.y.view(y_predi_loader.shape).detach().cpu())
      y_predi.append(y_predi_loader.detach().cpu())
    
    y_label = torch.cat(y_label, dim = 0).numpy()
    y_predi = torch.cat(y_predi, dim = 0).numpy()
    # # y_label = np.array(y_label)
    # # y_predi = np.array(y_label)
    # evaluator = Evaluator(name='ogbg-molhiv')
    # y_label = torch.tensor(y_label)
    # y_predi = torch.tensor(y_predi)

    precision = evaluator.eval({'y_true': y_label.reshape(len(y_label),1),
                                'y_pred': y_predi.reshape(len(y_label),1),
                               })
    
    return precision['rocauc']

In [None]:
losses4 = []
train_acc_list4 = []
test_acc_list4 = []

for epoch in range(1, 150):
    loss = train()
    losses4.append(loss)
    train_acc = test(train_loader)
    train_acc_list4.append(train_acc)
    test_acc = test(test_loader)
    test_acc_list4.append(test_acc)
    # if epoch % 10==0:
    print(f'Epoch: {epoch:03d}, Loss: {loss:.4f}, Train Acc: {train_acc:.4f}, Test Acc: {test_acc:.4f}')

Epoch: 001, Loss: 0.7237, Train Acc: 0.5293, Test Acc: 0.5678
Epoch: 002, Loss: 0.1561, Train Acc: 0.5103, Test Acc: 0.4867
Epoch: 003, Loss: 0.3063, Train Acc: 0.5098, Test Acc: 0.4807
Epoch: 004, Loss: 0.0123, Train Acc: 0.5059, Test Acc: 0.4767
Epoch: 005, Loss: 0.0867, Train Acc: 0.5066, Test Acc: 0.4714
Epoch: 006, Loss: 0.1161, Train Acc: 0.5088, Test Acc: 0.4667
Epoch: 007, Loss: 0.1459, Train Acc: 0.5163, Test Acc: 0.4702
Epoch: 008, Loss: 0.1049, Train Acc: 0.5271, Test Acc: 0.4785
Epoch: 009, Loss: 0.3281, Train Acc: 0.5570, Test Acc: 0.5168
Epoch: 010, Loss: 0.1783, Train Acc: 0.5756, Test Acc: 0.6061
Epoch: 011, Loss: 0.2276, Train Acc: 0.5499, Test Acc: 0.5952
Epoch: 012, Loss: 0.1154, Train Acc: 0.5480, Test Acc: 0.5876
Epoch: 013, Loss: 0.1020, Train Acc: 0.5459, Test Acc: 0.5852
Epoch: 014, Loss: 0.1690, Train Acc: 0.5420, Test Acc: 0.5819
Epoch: 015, Loss: 0.0877, Train Acc: 0.5417, Test Acc: 0.5810
Epoch: 016, Loss: 0.2284, Train Acc: 0.5413, Test Acc: 0.5841
Epoch: 0

In [None]:
df4 = pd.DataFrame(list(zip(train_acc_list4,test_acc_list4, losses4)), columns =['train_acc4', 'test_acc4','loss4'])

In [None]:
losses_float = [loss for loss in losses4] 
loss_indices = [i for i,l in enumerate(losses_float)] 
fig = px.line(df4, x=loss_indices, y=["train_acc4", "test_acc4", "loss4"], title="Mesure de performance pour le modele 3: avec 3 couches de GAT et self attention multi tete",
            labels={"train_acc4": "train_acc4", "loss4": "loss4","test_acc4":"test_acc4"})
fig.show()