In [1]:
import numpy as np
import torch.optim as optim # subpackage implementing various optimization algorithms
#from sklearn import metrics 
import torch
import torch.nn as nn
from data import CIFData   #CIFData is the code of the article in charge of processing the data, 
#it does practically everything that was already developed at the beginning of the notebook.
from data import get_train_val_test_loader #This article code is in charge of splitting the data 
#and uploading it to pytorch.
from data import collate_pool 
from torch.autograd import Variable
from model import CrystalGraphConvNet 

ImportError: cannot import name 'CIFData' from 'data' (/home/mlgraphs/anaconda3/envs/graphnet/lib/python3.10/site-packages/data/__init__.py)

In [2]:
#Check if pytorch recognizes your GPU
torch.cuda.is_available()

True

In [3]:
dataset = CIFData('/home/mlgraphs/CGCNN/cgcnn/data/complete-data') #Read the data 

In [4]:
collate_fn = collate_pool
#Split the data list 
train_loader, val_loader, test_loader = get_train_val_test_loader(
    dataset=dataset,
    collate_fn=collate_fn,
    batch_size=64,
    train_ratio=.8,
    val_ratio=.15,
    test_ratio=.05,
    train_size=None,
    val_size=None,
    test_size=None,
    return_test=True)

In [5]:
structures, _, _ = dataset[0]
orig_atom_fea_len = structures[0].shape[-1]
nbr_fea_len = structures[1].shape[-1]
model = CrystalGraphConvNet(orig_atom_fea_len, nbr_fea_len,
                            atom_fea_len=20,
                            n_conv=1,
                            h_fea_len=10,
                            n_h=5,
                            classification=False)
model.cuda() #Transfer the model to gpu 

CrystalGraphConvNet(
  (embedding): Linear(in_features=92, out_features=20, bias=True)
  (convs): ModuleList(
    (0): ConvLayer(
      (fc_full): Linear(in_features=81, out_features=40, bias=True)
      (sigmoid): Sigmoid()
      (softplus1): Softplus(beta=1, threshold=20)
      (bn1): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (bn2): BatchNorm1d(20, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (softplus2): Softplus(beta=1, threshold=20)
    )
  )
  (conv_to_fc): Linear(in_features=20, out_features=10, bias=True)
  (conv_to_fc_softplus): Softplus(beta=1, threshold=20)
  (fcs): ModuleList(
    (0): Linear(in_features=10, out_features=10, bias=True)
    (1): Linear(in_features=10, out_features=10, bias=True)
    (2): Linear(in_features=10, out_features=10, bias=True)
    (3): Linear(in_features=10, out_features=10, bias=True)
  )
  (softpluses): ModuleList(
    (0): Softplus(beta=1, threshold=20)
    (1): Softplus(beta=

In [6]:
criterion = nn.MSELoss() #Declare the mean square error loss function
optimizer = optim.Adam(model.parameters(),0.001) #Declare the Adam optimizer

In [7]:
def train(data_loader):
    model.train()
    for i, (input, target,_) in enumerate(data_loader):
        #Transfer the data to gpu 
        input_var = (Variable(input[0].cuda(non_blocking=True)),
                         Variable(input[1].cuda(non_blocking=True)),
                         input[2].cuda(non_blocking=True),
                         [crys_idx.cuda(non_blocking=True) for crys_idx in input[3]])
        target_var=Variable(target.cuda(non_blocking=True))
        optimizer.zero_grad()  # Clear gradients.
        outputs    = model(*input_var)  # Perform a single forward pass.
        loss   = criterion(outputs, target_var)  # Compute the loss 
        loss.backward()  # Derive gradients.
        optimizer.step()  # Update parameters based on gradients.
     
    return loss.item()

In [8]:
def mae(prediction, target):
    """
    Computes the mean absolute error between prediction and target

    Parameters
    ----------

    prediction: torch.Tensor (N, 1)
    target: torch.Tensor (N, 1)
    """
    return torch.mean(torch.abs(target - prediction))

In [9]:
def test(data_loader):
# since we're not training, we don't need to calculate the gradients for our outputs
#It will reduce memory consumption
    model.eval()
    with torch.no_grad():  
        for i, (input, target,_) in enumerate(data_loader):
            input_var = (Variable(input[0].cuda(non_blocking=True)),
                         Variable(input[1].cuda(non_blocking=True)),
                         input[2].cuda(non_blocking=True),
                         [crys_idx.cuda(non_blocking=True) for crys_idx in input[3]])
            target_var = Variable(target.cuda(non_blocking=True))
            output = model(*input_var)
            mae_error=mae(target_var, output)
    return mae_error.item()

In [None]:
epochs=10
#Lists where we are going to store the losses and acuraccies
t_l =[]
t_a =[]
v_l =[]
v_a =[]

#Seed to reproduce the same values
np.random.seed(1)
for epoch in range(epochs+1):
    train_loss= train(train_loader)
    val_loss = train(val_loader)
    train_acc= test(train_loader)
    val_acc = test(val_loader)
    #Save the losses and accuracies to be plotted later 
    t_l.append(train_loss)
    t_a.append(train_acc)
    v_l.append(val_loss)
    v_a.append(val_acc)
    
    #Print every 10 epochs 
    print(f'Epoch: {epoch:3d}, Loss: {train_loss:.4f}, Acc: {train_acc:.4f}, val_loss: {val_loss:.4f},val_acc: {val_acc:.4f}')
    #print(f'Epoch: {epoch:3d}, Loss: {train_loss:.4f}, val_loss: {val_loss:.4f}')



Epoch:   0, Loss: 0.7381, Acc: 0.8770, val_loss: 1.1674,val_acc: 0.7893




In [None]:
import matplotlib.pyplot as plt
plt.plot(t_a)
plt.plot(v_a)
plt.xlabel('epoch')
plt.ylabel('MAE')
plt.legend(['Train','Valid'])
plt.title('Train vs Valid MAE')
 
plt.show()

In [None]:
plt.plot(t_l)
plt.plot(v_l)
plt.xlabel('epoch')
plt.ylabel('losses')
plt.legend(['Train','Valid'])
plt.title('Train vs Valid Losses')
 
plt.show()

In [None]:
y_hat=[]
y=[]
for i, (input, target,_) in enumerate(test_loader):
    input_var = (Variable(input[0].cuda(non_blocking=True)),
                         Variable(input[1].cuda(non_blocking=True)),
                         input[2].cuda(non_blocking=True),
                         [crys_idx.cuda(non_blocking=True) for crys_idx in input[3]])
    target_var = Variable(target.cuda(non_blocking=True))
    pred = model(*input_var)
    y_hat.append(pred)
    y.append(target_var)

In [None]:
y_1_hat=y_hat[0].data.cpu().numpy()
y_1=y[0].data.cpu().numpy()

In [None]:
print(y_1_hat[:10])
print(y_1[:10])