# GNN_1

In 1000 epochs to 13.32 validation loss.

In [6]:
########### insert into GNN definition section ####################
import torch
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing, global_mean_pool, MLP, global_add_pool
from torch_geometric.nn import GCNConv

class Net(torch.nn.Module):
    def __init__(self):
        super(Net, self).__init__()

        self.etl_embedding_dimensions = 16
        self.htl_embedding_dimensions = 16
        self.absorber_dimensions = 76
        self.bandgap_dimension = 1
        self.hidden_dimension = 32
        self.number_of_regression_layers = 3

        self.etl_mpnn = GCNConv(in_channels=1, out_channels=self.etl_embedding_dimensions)
        self.htl_mpnn = GCNConv(in_channels=1, out_channels=self.htl_embedding_dimensions)

        self.fc1 = torch.nn.Linear(self.etl_embedding_dimensions + self.htl_embedding_dimensions + self.absorber_dimensions + self.bandgap_dimension, self.hidden_dimension)
        
        self.regression_layers = torch.nn.ModuleList([self.fc1])
        self.regression_layers.extend([torch.nn.Linear(self.hidden_dimension,  self.hidden_dimension) for i in range(1, self.number_of_regression_layers-1)])

        self.fc_out = torch.nn.Linear(self.hidden_dimension, 1)

    def forward(self, etl_features, htl_features, etl_edge_indices, htl_edge_indices, absorbers, bandgap):
        etl_x = self.etl_mpnn(etl_features, etl_edge_indices)
        etl_x = global_mean_pool(etl_x, torch.zeros(etl_x.size(0), dtype=torch.long))
        
        htl_x = self.htl_mpnn(htl_features, htl_edge_indices)
        htl_x = global_mean_pool(htl_x, torch.zeros(htl_x.size(0), dtype=torch.long))
        
        x = torch.cat([etl_x, htl_x, absorbers, bandgap], dim=1)
        for layer in self.regression_layers: 
            x = F.leaky_relu(layer(x))
        
        x = self.fc_out(x)
        x = F.softplus(x)
        return x

####### end of insert #############

In [5]:
######### insert into Training section ##############

decay = 0.995
averaged_model = AveragedModel(model, multi_avg_fn=get_ema_multi_avg_fn(decay))

optimizer = torch.optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)
criterion = torch.nn.MSELoss()

num_epochs = 1000
scheduler = torch.optim.lr_scheduler.OneCycleLR(optimizer, max_lr=0.001, steps_per_epoch=len(train_loader), epochs=num_epochs, pct_start=0.1,final_div_factor=1e2)

######### end of insert ################

NameError: name 'AveragedModel' is not defined

# GNN_2
Everything the same as above, but for the embedding dimensions being 32 instead of 16.
Overfits a lot.

# GNN_3

Went back to config of GNN_1, but added another input feature, atomweight.

In [None]:
# in smiles_to_graph insert:
    atom_features = []
    for atom in mol.GetAtoms():
        atomic_num = atom.GetAtomicNum()
        atomic_weight = atom.GetMass()
        atom_features.append([atomic_num, atomic_weight])
    x = torch.tensor(atom_features, dtype=torch.float)


In [None]:
# for the default data in creating MolecularDataset class, change dimensions of the zero verctor:
default_data = {'x': torch.zeros((1,2),  dtype=torch.float), 'edge_index': torch.zeros((2,1), dtype=torch.long)}

In [None]:
# in the net definitions, change input channels to 2
        self.etl_mpnn = GCNConv(in_channels=2, out_channels=self.etl_embedding_dimensions)
        self.htl_mpnn = GCNConv(in_channels=2, out_channels=self.htl_embedding_dimensions)

# Evaluation

In [7]:
model = Net()
model.load_state_dict(torch.load('models/GNN_1_best.pth'))
model.eval()

Net(
  (etl_mpnn): GCNConv(1, 16)
  (htl_mpnn): GCNConv(1, 16)
  (fc1): Linear(in_features=109, out_features=32, bias=True)
  (regression_layers): ModuleList(
    (0): Linear(in_features=109, out_features=32, bias=True)
    (1): Linear(in_features=32, out_features=32, bias=True)
  )
  (fc_out): Linear(in_features=32, out_features=1, bias=True)
)

In [9]:
all_predictions = []
all_labels = []

# Evaluate the model
with torch.no_grad():  # Disable gradient calculation for evaluation
    for batch in eval_loader:
        etl_features = batch['etl_features'].squeeze(0)
        htl_features = batch['htl_features'].squeeze(0)
        etl_edge_indices = batch['etl_edge_indices'].squeeze(0)
        htl_edge_indices = batch['htl_edge_indices'].squeeze(0)
        absorber = batch['absorber']
        bandgap = batch['bandgap']
        true_labels = batch['pce']
        
        # Forward pass
        predictions = model(etl_features, htl_features, etl_edge_indices, htl_edge_indices, absorber, bandgap)
        
        # Store predictions and true labels
        all_predictions.extend(predictions.cpu().numpy())
        all_labels.extend(true_labels.cpu().numpy())

# Compute evaluation metrics (e.g., Mean Squared Error)
mse = mean_squared_error(all_labels, all_predictions)
print(f"Mean Squared Error: {mse}")

# You can also compute other metrics like R^2 score, MAE, etc.
from sklearn.metrics import r2_score, mean_absolute_error

r2 = r2_score(all_labels, all_predictions)
mae = mean_absolute_error(all_labels, all_predictions)

print(f"R^2 Score: {r2}")
print(f"Mean Absolute Error: {mae}")

NameError: name 'eval_loader' is not defined