In [None]:
import torch.nn as nn
import torch
import numpy as np
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import random
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import joblib  
torch.manual_seed(1)
np.random.seed(1)
random.seed(1)

class MAPELoss(nn.Module):
    def __init__(self):
        super(MAPELoss, self).__init__()
    
    def forward(self, output, target):
        loss = torch.mean(torch.abs((target - output) / (target)))
        return loss

class MAELoss(nn.Module):
    def __init__(self):
        super(MAELoss, self).__init__()
    
    def forward(self, output, target):
        loss = torch.mean(torch.abs(target - output))
        return loss        

class MSELoss(nn.Module):
    def __init__(self):
        super(MSELoss, self).__init__()
    
    def forward(self, output, target):
        loss = torch.mean((target - output) ** 2)
        return loss        

def weights_init(m):
    classname = m.__class__.__name__
    if classname.find('BatchNorm') != -1:
        m.weight.data.normal_(1.0, 0.02)
        m.bias.data.fill_(0)

class FeatureDataset(Dataset):
    '''
    Args: x is a 2D numpy array [x_size, x_features]
    '''
    def __init__(self, x):
        self.x = x
    
    def __len__(self):
        return self.x.shape[0]
    
    def __getitem__(self, idx):
        return torch.FloatTensor(self.x[idx])

    def getBatch(self, idxs=[]):
        if idxs == None:
            return idxs
        else:
            x_features = []
            for i in idxs:
                x_features.append(self.__getitem__(i))
            return torch.FloatTensor(x_features)

def normalizing_data(data, seed=1):  
    
    composition = data[['Ba', 'Ca', 'Sr', 'Ti', 'Zr','Sn', 'Hf']]
    descriptors = data[['W', 'EI', 'EA', 'μ']]
    
    
    composition_scaler = MinMaxScaler()
    normalized_composition = composition_scaler.fit_transform(composition)
    descriptors_scaler = MinMaxScaler()
    normalized_descriptors = descriptors_scaler.fit_transform(descriptors)
    
    
    joblib.dump(composition_scaler, 'composition_scaler.joblib')  
    joblib.dump(descriptors_scaler, 'descriptors_scaler.joblib')  
    
    
    normalized_composition_df = pd.DataFrame(normalized_composition, columns=composition.columns)
    normalized_descriptors_df = pd.DataFrame(normalized_descriptors, columns=descriptors.columns)
    
    
    x = pd.concat([normalized_composition_df, normalized_descriptors_df], axis=1)
    print(x)
    
    
    y = data[['d33(pC/N)']]  
    print(y)

    
    x = torch.FloatTensor(x.values)
    y = torch.FloatTensor(y.values)

    if torch.cuda.is_available():
        x = x.cuda()
        y = y.cuda()
    
    
    train_features, test_features, train_labels, test_labels = train_test_split(x, y, test_size=0.2, random_state=seed)
    print(y)
    return x, y, train_features, test_features, train_labels, test_labels


In [None]:
import datetime
import torch.utils.data as Data
import pandas as pd
import torch
import torch.nn.functional as F    
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from sklearn import preprocessing

  
table = pd.DataFrame(columns=['target','batch_size','lr','module__n_hidden','module__w'])

plt.close('all')
starttime = datetime.datetime.now()
data = pd.read_excel('data-1.xlsx')


x, y, train_features, test_features, train_labels, test_labels = normalizing_data(data,seed=1)
print(train_features)
print(train_labels)
print(train_features.shape)  
print(train_labels.shape)   





In [None]:
import datetime
import torch.utils.data as Data
import pandas as pd
import torch
import torch.nn.functional as F    
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from bayes_opt import BayesianOptimization
from sklearn import preprocessing
import os


class Net(nn.Module):  
    def __init__(self, n_feature = 11, n_hidden = 64, n_output = 1, w = 6):
        super(Net, self).__init__()    
        
        self.hidden1 = torch.nn.Linear(n_feature, n_hidden) 
        nn.init.kaiming_normal_(self.hidden1.weight)
        
        self.hiddens = nn.ModuleList ([nn.Linear(n_hidden, n_hidden) for i in range(w)])                            
        for m in self.hiddens:
            nn.init.kaiming_normal_(m.weight)   
        
        self.predict = torch.nn.Linear(n_hidden, n_output)  
        nn.init.kaiming_normal_(self.predict.weight)

    def forward(self, x):  
        x = self.hidden1(x)
        
        
        x = F.relu(x)   
        
        for m in self.hiddens:
            x = m(x)
            
            x = F.relu(x) 
                   
        x = self.predict(x)
        
        return x
       

def train(net, num_epochs, batch_size, train_features, test_features, train_labels, test_labels,
          train_loader,
          optimizer):
    print ("\n=== train begin ===")
    
    train_ls, test_ls = [], []
    loss = MAPELoss() 
    for epoch in range(num_epochs):
        for x, y in train_loader:
            ls = loss(net(x).view(-1, 1), y.view(-1, 1))
            optimizer.zero_grad()
            ls.backward()
            optimizer.step()
        if epoch % 100 == 0:
            train_ls.append(loss(net(train_features).view(-1, 1), train_labels.view(-1, 1)).item())
            test_ls.append(loss(net(test_features).view(-1, 1), test_labels.view(-1, 1)).item())
            print ("epoch %d: train loss %f, test loss %f" % (epoch, train_ls[-1], test_ls[-1]))
        
    print ("=== train end ===")
    
def test(model, test_loader, set_name="Test set"):
    model.eval()
    test_loss = 0
    n = 0
    loss = MAPELoss() 
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            test_loss += loss(output.view(-1, 1), target.view(-1, 1)).item()  
            n += 1

    test_loss /= n
    
    print(f'{set_name}: Average loss: {test_loss:.4f}')
    return test_loss
        
def plotCurve(x_vals, y_vals, 
                     x_label, y_label, 
                     x2_vals=None, y2_vals=None, 
                    legend=None,
                    figsize=(3.5, 2.5)):
            
            plt.xlabel(x_label)
            plt.ylabel(y_label)
            plt.plot(x_vals, y_vals)
            if x2_vals and y2_vals:
                plt.plot(x2_vals, y2_vals, linestyle=':')
            
            if legend:
                plt.legend(legend)

In [None]:
import datetime
import torch.utils.data as Data
import pandas as pd
import torch
import torch.nn.functional as F    
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
from bayes_opt import BayesianOptimization
import time
import os
from sklearn import preprocessing


output_dir = 'NNBayesian'
if not os.path.exists(output_dir):
    os.makedirs(output_dir)


def train_model(batch_size, lr, module__n_hidden, module__w):
    module__n_hidden = int(module__n_hidden)  
    module__w = int(module__w)  
    batch_size = int(batch_size)

    train_dataset = Data.TensorDataset(train_features, train_labels)
    test_dataset = Data.TensorDataset(test_features, test_labels)

    train_loader = Data.DataLoader(train_dataset, batch_size, shuffle=True)
    test_loader = Data.DataLoader(test_dataset, batch_size, shuffle=True)

    net = Net(n_feature=11, n_hidden=module__n_hidden, n_output=1, w=module__w)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  
    net.to(device)  

    n_epochs = 1000
    optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=0.0001)

    train(net, n_epochs, batch_size, train_features, test_features, train_labels, test_labels, train_loader, optimizer)
    

    train_loss = test(net, train_loader, set_name="Training set")  
    test_loss = test(net, test_loader, set_name="Test set")  

    r = -np.abs(train_loss - test_loss)
    return -test_loss


bounds = {'lr': (0.0001, 0.001), 'batch_size': (32, 64), 'module__n_hidden': (64, 256), 'module__w': (2, 10)}


optimizer = BayesianOptimization(
    f=train_model,
    pbounds=bounds,
    random_state=1,
)

optimizer.maximize(init_points=100, n_iter=150)  


result_list = []  


for res in optimizer.res:
    result_list.append(pd.DataFrame({'target': [res['target']],
                                     'batch_size': [res['params']['batch_size']],
                                     'lr': [res['params']['lr']],
                                     'module__n_hidden': [res['params']['module__n_hidden']],
                                     'module__w': [res['params']['module__w']]}))


table = pd.concat(result_list, ignore_index=True)


table = pd.concat([table, pd.DataFrame({'target': [optimizer.max['target']],
                                        'batch_size': [optimizer.max['params']['batch_size']],
                                        'lr': [optimizer.max['params']['lr']],
                                        'module__n_hidden': [optimizer.max['params']['module__n_hidden']],
                                        'module__w': [optimizer.max['params']['module__w']]})], 
                                        ignore_index=True)


model_name = 'd33_inference_NN_{}'.format(datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
file_name = os.path.join(output_dir, '{}.xlsx'.format(model_name))


endtime = datetime.datetime.now()
Rtime = endtime - starttime
print(Rtime)


table.to_excel(file_name, index=False)  
print("保存结果至: ", file_name)
print(table)

In [None]:
model_name = 'd33_inference_NN'
file_name = '{}.xlsx'.format(model_name)
endtime = datetime.datetime.now()
Rtime = endtime - starttime
print(Rtime)
table.to_excel(file_name)
print(table)

In [None]:
import datetime
import torch.utils.data as Data
import pandas as pd
import torch
import torch.nn.functional as F   
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import train_test_split
import torch.nn as nn
import torch.optim as optim
import time
import os
import seaborn as sns
from sklearn.metrics import r2_score


def set_random_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True


set_random_seed(1)

folder_dir = 'Results/STU_NN_BO(100+150)_1'
if not os.path.exists(folder_dir):
    os.makedirs(folder_dir)  

folder_dir_figures = os.path.join(folder_dir, 'Figures')
if not os.path.exists(folder_dir_figures):
    os.makedirs(folder_dir_figures)  

t = time.localtime() 
plt.close('all')
target = pd.read_excel('d33_inference_NN.xlsx')
starttime = datetime.datetime.now()


results_df = pd.DataFrame(columns=['Iteration', 'Seed', 'target', 'R2_Score_test', 'Figure_Path_test', 'R2_Score_train', 'Figure_Path_train', 'R2_Score_all', 'Figure_Path_all', 'Final_Train_Loss', 'Final_Test_Loss'])


x_all, y_all, train_features, test_features, train_labels, test_labels = normalizing_data(data, seed=1)


train_df = pd.DataFrame(train_features.cpu().numpy())
test_df = pd.DataFrame(test_features.cpu().numpy())


train_file_path = 'train_features.xlsx'
test_file_path = 'test_features.xlsx'
train_df.to_excel(train_file_path, index=True)  
test_df.to_excel(test_file_path, index=True)   


for i in range(0, 251):  
    for j in range(1, 2):  
        set_random_seed(1)

        
        tg = target.at[i, 'target']
        lr = target.at[i, 'lr'] 
        module__n_hidden = target.at[i, 'module__n_hidden']
        module__w = target.at[i, 'module__w']
        batch_size = target.at[i, 'batch_size']

        module__n_hidden = int(module__n_hidden)
        module__w = int(module__w)
        batch_size = int(batch_size)

        
        train_dataset = Data.TensorDataset(train_features, train_labels)
        test_dataset = Data.TensorDataset(test_features, test_labels) 
        train_loader = Data.DataLoader(train_dataset, batch_size, shuffle=True)
        test_loader = Data.DataLoader(test_dataset, batch_size, shuffle=True) 
        
        
        class Net(nn.Module):  
            def __init__(self, n_feature=11, n_hidden=module__n_hidden, n_output=1, w=module__w):
                super(Net, self).__init__()   
                self.hidden1 = torch.nn.Linear(n_feature, n_hidden) 
                nn.init.kaiming_normal_(self.hidden1.weight)
                
                self.hiddens = nn.ModuleList([nn.Linear(n_hidden, n_hidden) for _ in range(w)])                            
                for m in self.hiddens:
                    nn.init.kaiming_normal_(m.weight)   
                    
                self.dropout = nn.Dropout(p=0.01)
                self.predict = torch.nn.Linear(n_hidden, n_output) 
                nn.init.kaiming_normal_(self.predict.weight)

            def forward(self, x): 
                x = self.hidden1(x)
                x = F.relu(x)   
                x = self.dropout(x)
                for m in self.hiddens:
                    x = m(x)
                    x = F.relu(x) 
                    x = self.dropout(x)          
                x = self.predict(x)
                return x

        
        net = Net()
        if torch.cuda.is_available():
            net = net.cuda()    
        
        train_ls, test_ls = [], []
        loss = MAPELoss() 
        n_epochs = 1000
        optimizer = optim.Adam(net.parameters(), lr=lr, weight_decay=0.0001)
        
        for epoch in range(n_epochs):
            for x, y in train_loader:
                ls = loss(net(x).view(-1, 1), y.view(-1, 1))
                optimizer.zero_grad()
                ls.backward()
                optimizer.step()
            
            train_ls.append(loss(net(train_features).view(-1, 1), train_labels.view(-1, 1)).item())
            test_ls.append(loss(net(test_features).view(-1, 1), test_labels.view(-1, 1)).item())

        
        loss_data = pd.DataFrame({'Epoch': range(1, n_epochs + 1), 'Train Loss': train_ls, 'Test Loss': test_ls})
        loss_file_path = f'Results/STU_NN_BO(100+150)_1/loss_data_{i}_seed_{j}.xlsx'
        loss_data.to_excel(loss_file_path, index=False)

        
        plt.figure()
        plt.plot(range(1, n_epochs + 1), train_ls, label="Train Loss", color='blue')
        plt.plot(range(1, n_epochs + 1), test_ls, label="Test Loss", color='orange')
        plt.legend()
        plt.text(60, 0.7, f'Target={tg:.4f}', fontdict={'size': 20, 'color':  'red'})
        fig_name_1 = f'Results/STU_NN_BO(100+150)_1/Figures/{i}-seed_{j}_loss.png'
        plt.savefig(fig_name_1, format='png', dpi=300)

        
        net.eval()
        predict_test = net(test_features.cuda()).cpu().data.numpy()
        predict_train = net(train_features.cuda()).cpu().data.numpy()
        predict_all = net(x_all.cuda()).cpu().data.numpy()

        
        pd.DataFrame({'Predicted': predict_train.flatten(), 'Actual': train_labels.cpu().data.numpy().flatten()}).to_excel(f'Results/STU_NN_BO(100+150)_1/predictions_train_{i}_seed_{j}.xlsx', index=False)
        pd.DataFrame({'Predicted': predict_test.flatten(), 'Actual': test_labels.cpu().data.numpy().flatten()}).to_excel(f'Results/STU_NN_BO(100+150)_1/predictions_test_{i}_seed_{j}.xlsx', index=False)
        pd.DataFrame({'Predicted': predict_all.flatten(), 'Actual': y_all.cpu().data.numpy().flatten()}).to_excel(f'Results/STU_NN_BO(100+150)_1/predictions_all_{i}_seed_{j}.xlsx', index=False)

         
        fig_name_2_test = f'Results/STU_NN_BO(100+150)_1/Figures/{i}-seed_{j}_test.png'
        plt.figure()
        sns.regplot(x=predict_test, y=test_labels.cpu().data.numpy(), color='red')
        plt.text(min(predict_test), max(test_labels.cpu().data.numpy()), f'R2={r2_score(test_labels.cpu().data.numpy(), predict_test):.4f}', color='red')
        plt.savefig(fig_name_2_test, format='png', dpi=300)

        
        fig_name_2_train = f'Results/STU_NN_BO(100+150)_1/Figures/{i}-seed_{j}_train.png'
        plt.figure()
        sns.regplot(x=predict_train, y=train_labels.cpu().data.numpy(), color='blue')
        plt.text(min(predict_train), max(train_labels.cpu().data.numpy()), f'R2={r2_score(train_labels.cpu().data.numpy(), predict_train):.4f}', color='blue')
        plt.savefig(fig_name_2_train, format='png', dpi=300)

        
        fig_name_2_all = f'Results/STU_NN_BO(100+150)_1/Figures/{i}-seed_{j}_all.png'
        plt.figure()
        sns.regplot(x=predict_train, y=train_labels.cpu().data.numpy(), color='blue', label="Train")
        sns.regplot(x=predict_test, y=test_labels.cpu().data.numpy(), color='red', label="Test")
        plt.text(min(predict_all), max(y_all.cpu().data.numpy()), f'R2={r2_score(y_all.cpu().data.numpy(), predict_all):.4f}', color='green')
        plt.legend()
        plt.savefig(fig_name_2_all, format='png', dpi=300)

        
        current_r2_test = r2_score(test_labels.cpu().data.numpy(), predict_test)
        current_r2_train = r2_score(train_labels.cpu().data.numpy(), predict_train)
        current_r2_all = r2_score(y_all.cpu().data.numpy(), predict_all)

        
        final_train_loss = train_ls[-1]  
        final_test_loss = test_ls[-1]  
        
        
        results_df = pd.concat([results_df, pd.DataFrame([{
            'Iteration': i,
            'Seed': j,
            'target': tg,
            'R2_Score_test': current_r2_test,
            'Figure_Path_test': fig_name_2_test,
            'R2_Score_train': current_r2_train,
            'Figure_Path_train': fig_name_2_train,
            'R2_Score_all': current_r2_all,
            'Figure_Path_all': fig_name_2_all,
            'Final_Train_Loss': final_train_loss,  
            'Final_Test_Loss': final_test_loss   
        }])], ignore_index=True)
        
        net_name = f'Results/STU_NN_BO(100+150)_1/{i}-seed_{j}.pt'
        torch.save(net.state_dict(), net_name)


results_df.to_csv('Results/STU_NN_BO(100+150)_1/results_summary_NN.csv', index=False)


endtime = datetime.datetime.now()
Rtime = endtime - starttime
print(Rtime)

In [None]:
model = Net()
model_parameters = filter(lambda p: p.requires_grad, model.parameters())

params = sum([np.prod(p.size()) for p in model_parameters])
print("model architecture:")
print(model)
print("num:{}".format(params))