<a href="https://colab.research.google.com/github/Sfonzie98/Dissertation/blob/main/Utils.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Libraries

In [None]:
!pip3 install torch==1.12.0+cu113 -f https://download.pytorch.org/whl/torch_stable.html
!pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-1.12.0+cu113.html
!pip install -q git+https://github.com/pyg-team/pytorch_geometric.git
!pip install import-ipynb
!pip install networkx
!pip install torchmetrics
!pip install rdkit-pypi

In [None]:
# Usefull libreries to load and process the data
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.exceptions import ConvergenceWarning
from sklearn.metrics import accuracy_score
from sklearn.svm import LinearSVC
import import_ipynb
import warnings
import os
import os.path as osp
from tqdm import tqdm

# Usefull libresies to create and work on graph
import torch
import torch_geometric
import torch.nn.functional as F
from torch_geometric.loader import DataLoader
from torch_geometric import data
from torch_geometric.data import Data, Dataset, Batch
import torch_geometric.utils
from torch.nn import Linear, BatchNorm1d, ReLU, Sequential
import torch.nn.functional as F
from torch_geometric.nn import global_mean_pool, GCNConv, GINConv, WLConv
import networkx as nx
from torchmetrics import F1Score

# Useful libraries to visualize chemical structures and calculate chemical properties 
import rdkit
from rdkit import Chem
from rdkit.Chem import Draw
from rdkit.Chem.rdMolDescriptors import CalcMolFormula

# Utils

In [None]:
# Draw colorful molecules

def draw_molecule(torch_graph):
  ''' (PyTorch Geometric graph) --> NetworkX graph picture

  The function creates an image of the molecule starting from its graph,
  assigning the following colors to the elements:
  Carbon --> black
  Oxigen --> red
  Nitrogen --> blue
  Others --> grey
  '''
  color_map = {'C': 'black',
             'O': 'red',
             'N': 'blue',
             'S': 'yellow',
             'B': 'green',
             'P': 'orange',
             'Cl': 'pink',
             'Br': 'brown',
             'Se': 'azure'}

  mol = Chem.MolFromSmiles(torch_graph['smiles'])
  G = torch_geometric.utils.to_networkx(torch_graph, to_undirected=True)

  for atom in mol.GetAtoms():
    G.add_node(atom.GetIdx(),
                atom_symbol=atom.GetSymbol())
  
  molecule_atom = nx.get_node_attributes(G, 'atom_symbol')

  molecule_colors = []
  for idx in G.nodes():
      if (G.nodes[idx]['atom_symbol'] in color_map):
          molecule_colors.append(color_map[G.nodes[idx]['atom_symbol']])
      else:
          molecule_colors.append('gray')

  Gr = plt.figure(figsize=(8, 8))
  Gr = nx.draw(G,
        labels=molecule_atom,
        with_labels = True,
        node_color=molecule_colors,
        node_size=200)
  
  plt.show()

In [None]:
# Returns the chemical formula of a molecule

def formula(smiles):
  ''' (SMILES str) --> RDKit object

  The function converts the smile string to a mol 
  object and calculates the chemical formula it returns.
  '''
  
  mol = Chem.MolFromSmiles(smiles)
  c_f = CalcMolFormula(mol)
  return c_f

In [None]:
# Returns the class of activity

def activity(torch_graph):
  ''' (PyTorch Geometric graph) --> str
  
  The function take as input a PyTorch graph, extract the label value
  and return a string with molecule's activity.
  '''

  if torch_graph.y == 2:
    return 'Very Active'
  elif torch_graph.y == 1:
    return 'Moderately Active'
  elif torch_graph.y == 0:
    return 'Non Active'
  else:
    return 'Activity data is non available'


In [None]:
# Train algorithms

def train(model, criterion, optimizer, loader, lamb=1):
  model.train()
  loss_all = 0
  for data in loader:
    output = model(data.x, data.edge_index, data.batch)
    loss = criterion(output, data.y)
    
    l2_lambda = lamb
    l2_norm = sum(p.pow(2.0).sum()
                  for p in model.parameters())
    loss = loss + l2_lambda * l2_norm
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    loss_all += loss.item() * data.num_graphs
  return loss_all / len(loader.dataset)

In [None]:
# Calculate the loss function on the test dataset

def test_loss(model, criterion, loader):
  total_loss_val = 0
  with torch.no_grad():
    for data in loader:
      output = model(data.x, data.edge_index, data.batch)
      batch_loss = criterion(output, data.y)
      
      total_loss_val += batch_loss.item() * data.num_graphs
  return total_loss_val / len(loader.dataset)

In [None]:
# Calculate the model accuracy

def acc(model, n_class, loader):
  model.eval()
  for data in loader:
    output = model(data.x, data.edge_index, data.batch)

    f1 = F1Score(average='macro', num_classes=n_class)
    f1_s = f1(output, data.y)
  
  return f1_s