In [1]:
import torch
import torch.nn as nn
from sklearn import preprocessing
from matminer.featurizers.composition import alloy
from matminer.featurizers.conversions import StrToComposition

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = pd.read_csv('data.csv')

# Convert formula to composition
data = StrToComposition().featurize_dataframe(data, 'formula')
# 然后基于composition计算特征
data = alloy.WenAlloys().featurize_dataframe(data, 'composition')

StrToComposition:   0%|          | 0/799 [00:00<?, ?it/s]

WenAlloys:   0%|          | 0/799 [00:00<?, ?it/s]

In [3]:
# 选择前1500条数据作为训练集和验证集；后500条数据作为验证集。
data_fit = data.iloc[:400]
data_test = data.iloc[400:]

data_fit_X = data_fit[['APE mean', 'Electronegativity local mismatch', 'VEC mean', 'Shear modulus mean', 'Shear modulus delta', 'Shear modulus strength model']]
data_fit_y = data_fit[['SFE']]
data_test_X = data_test[['APE mean', 'Electronegativity local mismatch', 'VEC mean', 'Shear modulus mean', 'Shear modulus delta', 'Shear modulus strength model']]
data_test_y = data_test[['SFE']]

In [5]:
# # 数据归一化
# min_max_scaler = preprocessing.MinMaxScaler()
# data_fit_X = min_max_scaler.fit_transform(data_fit_X)
# data_test_X = min_max_scaler.transform(data_test_X)

# 将数据类型转换为tensor
data_fit_X = torch.FloatTensor(data_fit_X.values)
data_fit_y = torch.FloatTensor(data_fit_y.values)
data_test_X = torch.FloatTensor(data_test_X.values)
data_test_y = torch.FloatTensor(data_test_y.values)

In [11]:
class Net(torch.nn.Module):
    def __init__(self, n_feature, n_hidden, n_output):
        super(Net, self).__init__()
        self.hidden = torch.nn.Linear(n_feature, n_hidden)   # 隐层
        self.relu = torch.nn.ReLU()                          # 选择激活层
        self.predict = torch.nn.Linear(n_hidden, n_output)   # 输出层

    def forward(self, x):
        x = self.hidden(x)                                   # 计算隐层
        x = self.relu(x)                                     # 计算激活层
        x = self.predict(x)                                  # 输出层
        return x


Net(
  (hidden): Linear(in_features=6, out_features=20, bias=True)
  (relu): ReLU()
  (predict): Linear(in_features=20, out_features=1, bias=True)
)


In [35]:
# 选择损失函数和优化方法
import sklearn
metric_func = lambda y_pred, y_true: sklearn.metrics.accuracy_score(y_true.data.numpy(), y_pred.data.numpy())
metric_name = 'accuracy'

loss_func = torch.nn.SmoothL1Loss()

for i in range(1, 2):
    net = Net(n_feature=6, n_hidden=i, n_output=1)              # 定义网络
    optimizer = torch.optim.SGD(net.parameters(), lr=0.5)

    for t in range(50):
        prediction = net(data_fit_X)                                      # 用网络预测一下
        loss = loss_func(prediction, data_fit_y)                          # 计算损失
        optimizer.zero_grad()                                    # 清除上一步的梯度
        loss.backward()                                          # 反向传播, 计算梯度
        optimizer.step()                                         # 优化一步
        # print("train loss: {}".format(loss.item()))

        with torch.no_grad():
            predicted = net(data_test_X)
            score = getattr(sklearn.metrics, 'r2_score')(data_test_y, predicted)
            # print("test accur: {}".format(score))
            print(predicted[3])
            print(predicted[0])
    print("\n")

predicted = net(data_test_X)
# print(predicted)

tensor([1.0404])
tensor([1.0404])
tensor([1.1725])
tensor([1.1725])
tensor([1.2388])
tensor([1.2388])
tensor([1.2719])
tensor([1.2719])
tensor([1.2885])
tensor([1.2885])
tensor([1.2968])
tensor([1.2968])
tensor([1.3009])
tensor([1.3009])
tensor([1.3030])
tensor([1.3030])
tensor([1.3040])
tensor([1.3040])
tensor([1.3045])
tensor([1.3045])
tensor([1.3048])
tensor([1.3048])
tensor([1.3049])
tensor([1.3049])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050])
tensor([1.3050

In [31]:
print(data_test_y)

tensor([[1.3389],
        [1.3615],
        [1.1708],
        [0.9958],
        [1.2644],
        [1.4218],
        [1.5325],
        [1.8895],
        [0.9257],
        [1.2275],
        [1.7786],
        [0.8565],
        [0.8324],
        [1.8704],
        [1.6943],
        [1.9546],
        [1.3561],
        [1.9981],
        [1.6506],
        [1.2616],
        [1.1711],
        [1.0339],
        [1.1983],
        [1.3449],
        [1.3842],
        [1.4108],
        [1.2423],
        [1.1057],
        [1.2796],
        [1.0635],
        [1.3798],
        [1.0597],
        [1.2247],
        [2.1491],
        [0.8386],
        [1.0799],
        [1.3583],
        [1.1874],
        [0.8555],
        [1.4097],
        [1.7277],
        [1.1623],
        [1.3405],
        [1.3350],
        [2.0580],
        [1.7993],
        [0.7360],
        [1.3243],
        [1.5481],
        [1.1255],
        [1.2696],
        [1.2338],
        [0.8353],
        [1.3665],
        [1.7180],
        [1