In [1]:
from matminer.featurizers.composition import alloy
from matminer.featurizers.conversions import StrToComposition
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import cross_val_predict
import sklearn
from sklearn.svm import SVR


import matplotlib.pyplot as plt
import matplotlib
import joblib
import pandas as pd
import sys


ppn = sys.argv[1]

In [3]:
data = pd.read_csv('data.csv')

# Convert formula to composition
data = StrToComposition().featurize_dataframe(data, 'formula')
# 然后基于composition计算特征
data = alloy.WenAlloys().featurize_dataframe(data, 'composition')

data_fit = data.iloc[:600]
data_test = data.iloc[600:]

data_fit_X = data_fit[['APE mean', 'Electronegativity local mismatch', 'VEC mean', 'Shear modulus mean', 'Shear modulus delta', 'Shear modulus strength model']]
data_fit_y = data_fit['SFE']
data_test_X = data_test[['APE mean', 'Electronegativity local mismatch', 'VEC mean', 'Shear modulus mean', 'Shear modulus delta', 'Shear modulus strength model']]
data_test_y = data_test['SFE']

StrToComposition:   0%|          | 0/799 [00:00<?, ?it/s]

WenAlloys:   0%|          | 0/799 [00:00<?, ?it/s]

In [None]:
model_svr = SVR()
param_grid = {'C': [0.1, 1, 10, 100, 1000], 
              'kernel': ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'], 
              'degree': [2, 3, 4, 5, 6, 7, 8, 9, 10], 
              'gamma': ['scale', 'auto'], 
              'coef0': [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], 
              'tol': [0.001, 0.0001, 0.00001, 0.000001]}
search_svr = GridSearchCV(model_svr, param_grid, cv=5, scoring='neg_mean_squared_error', n_jobs=int(ppn))
search_svr.fit(data_fit_X, data_fit_y)

print('网格搜索-度量记录：',search_svr.cv_results_)  # 包含每次训练的相关信息
print('网格搜索-最佳度量值:',search_svr.best_score_)  # 获取最佳度量值
print('网格搜索-最佳参数：',search_svr.best_params_)  # 获取最佳度量值时的代定参数的值。是一个字典
print('网格搜索-最佳模型：',search_svr.best_estimator_)  # 获取最佳度量时的分类器模型

In [None]:
# 保存模型
import joblib

joblib.dump(search_svr, 'model_SVR.pkl')

In [None]:
print("\n")
print("-"*50)

# 交叉验证评价性能
svr_pridict = cross_val_predict(search_svr.best_estimator_, data_test_X, data_test_y, cv=10)

for scorer in ['r2_score', 'mean_absolute_error', 'mean_squared_error']:
    score = getattr(sklearn.metrics, scorer)(data_test_y, svr_pridict)
    print(scorer, score)