### Convolutional Neural Network

#### explore data

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# 
X = pd.read_csv("./data/dataTrain.csv", header=0, usecols=['electronegativity', 'd orbital of metal', 'group', 'radius/pm', 'first ionization energy'])
y = pd.read_csv("./Data/dataTrain.csv", header=0, usecols= ['1V-ORR'])


#print(X['electronegativity'])
#print(y['2V-OER'])
plt.figure(figsize=(14, 2))
plt.subplot(1, 5, 1)
plt.scatter(X['electronegativity'], y['1V-ORR'])
plt.xlabel('electronegativity')
plt.ylabel('1V-ORR')

plt.subplot(1, 5, 2)
plt.scatter(X['d orbital of metal'], y['1V-ORR'])
plt.xlabel('d orbital of metal')

plt.subplot(1, 5, 3)
plt.scatter(X['group'], y['1V-ORR'])
plt.xlabel('group')

plt.subplot(1, 5, 4)
plt.scatter(X['radius/pm'], y['1V-ORR'])
plt.xlabel('radius/pm')

plt.subplot(1, 5, 5)
plt.scatter(X['first ionization energy'], y['1V-ORR'])
plt.xlabel('first ionization energy')

plt.show()

#### Prepare data for Pytorch

In [1]:
import torch
import torch.utils.data as td
from sklearn.model_selection import train_test_split
import pandas as pd

def loaddata(pathway, Ylabel, i, j, k):
    df = pd.read_csv(pathway)
    Xfeatures = ['electronegativity', 'd orbital of metal', 'group', 'radius/pm', 'first ionization energy']
    x = df[Xfeatures].values
    y = df[Ylabel].values

    label0, label1, label2 = x[i], x[j], x[k]
    label0, label1, label2 = torch.Tensor(label0).float(), torch.Tensor(label1).float(), torch.Tensor(label2).float()
    label0 = label0.reshape(1, 1, 5).float()
    label1 = label1.reshape(1, 1, 5).float()
    label2 = label2.reshape(1, 1, 5).float()
    print(label0)
    
    # split data
    X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=10)
    
    # create loader for the training data and labels
    train_x = torch.Tensor(X_train).float()
    train_x = train_x.reshape(12, 1, 5).float()
    train_y = torch.Tensor(y_train).float()
    train_data = td.TensorDataset(train_x, train_y)
    train_loader = td.DataLoader(train_data, batch_size=1, shuffle=False, num_workers=1)

    # create loader for the test data and labels
    test_x = torch.Tensor(X_test).float()
    test_x = test_x.reshape(2, 1, 5).float()
    test_y = torch.Tensor(y_test).float()
    test_data = td.TensorDataset(test_x, test_y)
    test_loader = td.DataLoader(test_data, batch_size=1, shuffle=False, num_workers=1)

    return train_loader, test_loader, label0, label1, label2

pathway = "./Data/dataTrain.csv"
Ylabel = "1V-ORR"
train_loader, test_loader, label0, label1, label2 = loaddata(pathway, Ylabel, 1, 2, 3)


tensor([[[-0.8862, -1.3299, -1.2850, -0.2905, -0.9610]]])


#### Define the CNN

In [2]:
import torch.nn as nn
import torch.nn.functional as F

class Conv1d_same_padding(nn.Module):
    def __init__(self, inplanes, planes, kernel_size, strides=1, dilation=1):
        super(Conv1d_same_padding, self).__init__()
        self.kernel_size = kernel_size
        self.strides = strides
        self.dilation = dilation
        self.conv1d = nn.Conv1d(inplanes, planes, kernel_size, strides, bias=False)
        nn.init.xavier_uniform_(self.conv1d.weight)

    def forward(self, x):
        input_rows = x.size(2)
        out_rows = (input_rows + self.strides -1) // self.strides
        padding_rows = max(0, (out_rows - 1) * self.strides + (self.kernel_size - 1) * self.dilation + 1 - input_rows)
        x = F.pad(x, pad=(0, padding_rows), mode="constant")
        x = self.conv1d(x)
        return x

class CNNNet(nn.Module):
    def __init__(self):
        super(CNNNet, self).__init__()

        self.conv0 = Conv1d_same_padding(4, 16, 1)
        self.conv1 = Conv1d_same_padding(16, 16, 1)
        self.conv2 = Conv1d_same_padding(16, 16, 1)
        self.conv3 = Conv1d_same_padding(16, 16, 1)
        self.conv4 = Conv1d_same_padding(16, 8, 1)

        self.dense0 = nn.Linear(40, 32, bias=False)
        self.dense1 = nn.Linear(32, 16, bias=False)
        self.dense2 = nn.Linear(16, 1, bias=False)

        nn.init.xavier_uniform_(self.dense0.weight)
        nn.init.xavier_uniform_(self.dense1.weight)
        nn.init.xavier_uniform_(self.dense2.weight)

    def forward(self, x):
        x = F.relu(self.conv0(x))
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))

        x = torch.flatten(x)
        
        x = F.relu(self.dense0(x))
        x = F.relu(self.dense1(x))
        x = self.dense2(x)
        return x

model = CNNNet()
print(model)

CNNNet(
  (conv0): Conv1d_same_padding(
    (conv1d): Conv1d(4, 16, kernel_size=(1,), stride=(1,), bias=False)
  )
  (conv1): Conv1d_same_padding(
    (conv1d): Conv1d(16, 16, kernel_size=(1,), stride=(1,), bias=False)
  )
  (conv2): Conv1d_same_padding(
    (conv1d): Conv1d(16, 16, kernel_size=(1,), stride=(1,), bias=False)
  )
  (conv3): Conv1d_same_padding(
    (conv1d): Conv1d(16, 16, kernel_size=(1,), stride=(1,), bias=False)
  )
  (conv4): Conv1d_same_padding(
    (conv1d): Conv1d(16, 8, kernel_size=(1,), stride=(1,), bias=False)
  )
  (dense0): Linear(in_features=40, out_features=32, bias=False)
  (dense1): Linear(in_features=32, out_features=16, bias=False)
  (dense2): Linear(in_features=16, out_features=1, bias=False)
)


#### Train the model

In [3]:
def train(model, pathway, optimizer, i, j, k):
    # set the model to train the model
    model.train()
    train_loss = 0
    
    train_loader, test_loader, label0, label1, label2 = loaddata(pathway, Ylabel, i, j, k)
    loss_criteria = nn.MSELoss()
    index = 0
    for datax, target in train_loader:
        index += 1
        data = torch.cat([datax, label0, label1, label2], dim=1)
        # feed forward
        optimizer.zero_grad()
        out = model(data)
        loss = loss_criteria(out, target)
        train_loss += loss.item()

        # backpropagate 
        loss.backward()
        optimizer.step()

    avg_loss = train_loss / (index + 1)
    print("Training set: Average loss: {:.6f}".format(avg_loss))
    return avg_loss

def test(model, pathway, i, j, k):
    # set the model to evaluation mode
    model.eval()
    test_loss = 0
    
    train_loader, test_loader, label0, label1, label2 = loaddata(pathway, Ylabel, i, j, k)
    loss_criteria = nn.MSELoss()

    with torch.no_grad():
        index = 0
        for datax, target in test_loader:
            index += 1
            data = torch.cat([datax, label0, label1, label2], dim=1)
            out = model(data)
            test_loss += loss_criteria(out, target).item()

        avg_loss = test_loss /(index+1)
        print("Test set: Average loss: {:.6f}".format(avg_loss))
        return avg_loss
            

In [None]:
 
def train(model, train_loader, optimizer, label0, label1, label2):
    # set the model to train the model
    model.train()
    train_loss = 0
    
    loss_criteria = nn.MSELoss()
    index = 0
    for datax, target in train_loader:
        index += 1
        data = torch.cat([datax, label0, label1, label2], dim=1)
        # feed forward
        optimizer.zero_grad()
        out = model(data)
        loss = loss_criteria(out, target)
        train_loss += loss.item()

        # backpropagate 
        loss.backward()
        optimizer.step()

    avg_loss = train_loss / (index + 1)
    print("Training set: Average loss: {:.6f}".format(avg_loss))
    return avg_loss
    
def test(model, test_loader, label0, label1, label2):
    # set the model to evaluation mode
    model.eval()
    test_loss = 0
    
    loss_criteria = nn.MSELoss()

    with torch.no_grad():
        index = 0
        for datax, target in test_loader:
            index += 1
            data = torch.cat([datax, label0, label1, label2], dim=1)
            out = model(data)
            test_loss += loss_criteria(out, target).item()

        avg_loss = test_loss /(index+1)
        print("Test set: Average loss: {:.6f}".format(avg_loss))
        return avg_loss

In [4]:
import torch.nn as nn

# use an "Adam" optimizer to adjust weights
learning_rate = 0.001
optimizer = torch.optim.Adam(model.parameters(), lr= learning_rate)
optimizer.zero_grad()

# model and datafile
train_loader, test_loader, label0, label1, label2 = loaddata(pathway, Ylabel, 3, 4, 13)
model = CNNNet()

# track metrics
epoch_nums = []
training_loss = []
validation_loss = []

epochs = 5
for epoch in range(1, epochs+1):
    print("Epoch: {}".format(epoch))
    
    train_loss = train(model, train_loader, optimizer, label0, label1, label2)
    test_loss = test(model, test_loader, label0, label1, label2)

    epoch_nums.append(epoch)
    training_loss.append(train_loss)
    validation_loss.append(test_loss)



Epoch: 1
tensor([[[-0.6303, -0.9945, -0.9334, -0.6453, -1.0398]]])
Training set: Average loss: 2.664845
tensor([[[-0.6303, -0.9945, -0.9334, -0.6453, -1.0398]]])
Test set: Average loss: 3.332148
Epoch: 2
tensor([[[-0.6303, -0.9945, -0.9334, -0.6453, -1.0398]]])
Training set: Average loss: 2.664845
tensor([[[-0.6303, -0.9945, -0.9334, -0.6453, -1.0398]]])
Test set: Average loss: 3.332148
Epoch: 3
tensor([[[-0.6303, -0.9945, -0.9334, -0.6453, -1.0398]]])
Training set: Average loss: 2.664845
tensor([[[-0.6303, -0.9945, -0.9334, -0.6453, -1.0398]]])
Test set: Average loss: 3.332148
Epoch: 4
tensor([[[-0.6303, -0.9945, -0.9334, -0.6453, -1.0398]]])
Training set: Average loss: 2.664845
tensor([[[-0.6303, -0.9945, -0.9334, -0.6453, -1.0398]]])
Test set: Average loss: 3.332148
Epoch: 5
tensor([[[-0.6303, -0.9945, -0.9334, -0.6453, -1.0398]]])
Training set: Average loss: 2.664845
tensor([[[-0.6303, -0.9945, -0.9334, -0.6453, -1.0398]]])
Test set: Average loss: 3.332148


In [None]:
import matplotlib.pyplot as plt

plt.plot(epoch_nums, training_loss)
plt.plot(epoch_nums, validation_loss)