### 提供精确的属性预估值

In [1]:
import pandas as pd
import xgboost as xgb
import numpy as np
def predict(wob,T,pred_y):
    #数据预处理
    df = pd.DataFrame(columns=['wob_min','wob_max','wob_mean','wob_std','wob_skew','wob_kurt','T_min','T_max','T_mean','T_std','T_skew','T_kurt'])

    wob_min = np.min(wob)
    wob_max = np.max(wob)
    wob_mean = np.mean(wob)
    wob_std = np.std(wob, ddof=1)  # ddof=0 对于样本标准差，ddof=1 对于总体标准差
    wob_skew = pd.Series(wob).skew()
    wob_kurt = pd.Series(wob).kurt()

    T_min = np.min(T)
    T_max = np.max(T)
    T_mean = np.mean(T)
    T_std = np.std(T, ddof=1)  # ddof=0 对于样本标准差，ddof=1 对于总体标准差
    T_skew = pd.Series(T).skew()
    T_kurt = pd.Series(T).skew()

    row = {'wob_min':wob_min,'wob_max':wob_max,'wob_mean':wob_mean,'wob_std':wob_std,'wob_skew':wob_skew,'wob_kurt':wob_kurt,'T_min':T_min,'T_max':T_max,'T_mean':T_mean,'T_std':T_std,'T_skew':T_skew,'T_kurt':T_kurt}
    df.loc[len(df)] = row
    res = {}
    for pred in pred_y:
        # 加载已保存的模型
        loaded_bst = xgb.Booster()
        loaded_bst.load_model("./模型/model_"+pred+".xgb")  # 从model.xgb文件加载模型
        # 使用加载后的模型进行预测
        feature_name =df.columns
        dtest = xgb.DMatrix(df,feature_names=feature_name)
        bst_ypred = loaded_bst.predict(dtest)
        print(pred+':',bst_ypred)
        res[pred] = bst_ypred
    return res





#模型输入数据，3秒的wob和T数据 以及要预测的属性名称
#['设备S','设备Z','静态抗压强度', '弹性模量', '泊松比', '抗拉强度', '黏聚力', '内摩擦角', '回弹均值', '动态强度', '滑动摩擦系数', '声级', '波速', '密度均值', '渗透率', '孔隙度', '标定温度']

wob = [3.52979,3.5755,3.02758,3.65089,3.30212,3.63342,3.38165,3.46744,3.56689,3.00286]
T = [11.48,9.597,10.277,11.4,7.998,9.718,10.092,11.97,9.343,9.56]
pred_y = ['静态抗压强度', '弹性模量', '泊松比','抗拉强度', '黏聚力']


#调用模型
predict(wob,T,pred_y)

静态抗压强度: [111.04034]
弹性模量: [53.288494]
泊松比: [0.25958583]
抗拉强度: [6.71879]
黏聚力: [12.006973]


{'静态抗压强度': array([111.04034], dtype=float32),
 '弹性模量': array([53.288494], dtype=float32),
 '泊松比': array([0.25958583], dtype=float32),
 '抗拉强度': array([6.71879], dtype=float32),
 '黏聚力': array([12.006973], dtype=float32)}

### 提供精确的材料类型

In [54]:
import pandas as pd
import xgboost as xgb
import numpy as np
def predict_kind(wob,T):
    #数据预处理
    df = pd.DataFrame(columns=['wob_min','wob_max','wob_mean','wob_std','wob_skew','wob_kurt','T_min','T_max','T_mean','T_std','T_skew','T_kurt'])

    wob_min = np.min(wob)
    wob_max = np.max(wob)
    wob_mean = np.mean(wob)
    wob_std = np.std(wob, ddof=1)  # ddof=0 对于样本标准差，ddof=1 对于总体标准差
    wob_skew = pd.Series(wob).skew()
    wob_kurt = pd.Series(wob).kurt()
    T_min = np.min(T)
    T_max = np.max(T)
    T_mean = np.mean(T)
    T_std = np.std(T, ddof=1)  # ddof=0 对于样本标准差，ddof=1 对于总体标准差
    T_skew = pd.Series(T).skew()
    T_kurt = pd.Series(T).skew()
    row = {'wob_min':wob_min,'wob_max':wob_max,'wob_mean':wob_mean,'wob_std':wob_std,'wob_skew':wob_skew,'wob_kurt':wob_kurt,'T_min':T_min,'T_max':T_max,'T_mean':T_mean,'T_std':T_std,'T_skew':T_skew,'T_kurt':T_kurt}
    df.loc[len(df)] = row


    #计算各种属性，计算其与标注属性之间的距离
    cols = ['设备S', '静态抗压强度', '弹性模量', '泊松比', '抗拉强度', '黏聚力', '内摩擦角', '波速', '密度均值', '孔隙度', '标定温度']
    res_list = []
    for pred in cols:
        # 加载已保存的模型
        loaded_bst = xgb.Booster()
        loaded_bst.load_model("./模型/model_"+pred+".xgb")  # 从model.xgb文件加载模型
        # 使用加载后的模型进行预测
        feature_name =df.columns
        dtest = xgb.DMatrix(df,feature_names=feature_name)
        bst_ypred = loaded_bst.predict(dtest)
        res_list.append(bst_ypred[0])

    cl2 = [101.4,110.95,53.58,0.26,6.65,11.85,38.44,5.49,2.86,2.08,60.21]
    cl4 = [71.5,117.49,58.62,0.33,10.68,13.53,46.34,5.24,3.28,2.09,57.99]
    cl5 = [67.9,126.65,72.11,0.27,12.67,21.09,44.90,6.76,3.45,2.06,63.23]
    cl7 = [65.7,53.53,31.54,0.28,4.56,8.02,45.61,5.95,3.67,14.49,57.11]
    cl8 = [72.9,164.87,53.87,0.23,5.76,12.01,44.17,4.72,4.32,1.87,63.54]
    cl10 = [69.3,48.66,47.03,0.24,4.96,18.04,44.86,5.31,3.87,2.97,57.14]
    cl11 = [75.2,192.15,77.85,0.21,7.74,17.81,45.53,6.58,3.06,1.14,65.07]
    cl13 = [70.8,129.06,85.21,0.26,6.84,21.22,38.96,6.41,3.81,1.25,56.53]
    cl55 = [68.7,39.96,29.81,0.24,3.11,16.99,31.80,2.97,2.74,18.47,53.31]
    cl21 = [64.5,61.84,32.55,0.27,4.72,13.75,41.61,3.43,2.64,11.68,55.67]
    cl22 = [78.1,131.05,55.01,0.19,7.45,20.80,46.75,5.10,2.53,1.16,66.72]

    resss = {'DTj':cl2,'Fec-BR':cl4,'DT':cl5,'Py':cl7,'Feb-BR':cl8,'BR':cl10,'CF':cl11,'Fed-DT':cl13,'ST':cl55,'GS':cl21,'GG':cl22}
    min_dis = float('inf')
    cl = ''
    for key,temp in resss.items():

        dis = np.linalg.norm(np.array(res_list) - np.array(temp))

        if dis < min_dis:
            min_dis = dis
            cl = key
    return res_list,cl


#模型输入数据  3秒的wob和T数据
#wob = [3.52979,3.5755,3.02758,3.65089,3.30212,3.63342,3.38165,3.46744,3.56689,3.00286]
wob = [6.035,
5.217,
5.344,
5.333,
5.534,
5.18,
5.428,
5.882,
5.275,
5.666]
#T = [11.48,9.597,10.277,11.4,7.998,9.718,10.092,11.97,9.343,9.56]
T=[3.27191,
3.43877,
3.2331,
3.26156,
3.26315,
3.15018,
3.10053,
3.29737,
3.32236,
3.27667,
]
predict_kind(wob,T)

([67.39486,
  44.340816,
  30.698963,
  0.27198794,
  3.2647343,
  7.9142647,
  39.759438,
  3.2317288,
  2.8105347,
  18.90503,
  54.70103],
 'Py')

In [20]:

cssj

Unnamed: 0,T,WOB,time
0,3.31282,5.122,0.332
1,3.48516,5.476,0.662
2,3.50521,5.096,0.994
3,3.55572,5.502,1.323
4,3.34059,5.492,1.543
...,...,...,...
3039,7.48000,30.590,959.142
3040,7.49000,30.520,959.473
3041,7.50000,30.510,959.803
3042,7.45000,30.500,960.135


In [55]:
cssj = pd.read_excel('./测试数据.xlsx')
cssj['时间'] = [int(temp) for temp in cssj['time']]
cssj['res'] = ' '
cols = ['设备S', '静态抗压强度', '弹性模量', '泊松比', '抗拉强度', '黏聚力', '内摩擦角', '波速', '密度均值', '孔隙度', '标定温度']
for col in cols:    
    cssj[col] = ' '
start = min(cssj['时间'])
end = max(cssj['时间'])

for time in range(start,end,3):   
    device_temp = cssj[(cssj['时间']>=time) & (cssj['时间']<(time+3))]    
    wob = list(device_temp['WOB'])
    T = list(device_temp['T'])
    res_list,res = predict_kind(wob,T)
    cssj.loc[(cssj['时间'] >= time) & (cssj['时间'] < (time + 3)), 'res'] = res
    for i,col in enumerate(cols):    
        cssj.loc[(cssj['时间'] >= time) & (cssj['时间'] < (time + 3)), col] = res_list[i]

cssj = cssj.drop_duplicates(subset=['时间'], keep='first')
cssj

Unnamed: 0,WOB,T,time,时间,res,设备S,静态抗压强度,弹性模量,泊松比,抗拉强度,黏聚力,内摩擦角,波速,密度均值,孔隙度,标定温度
0,3.31282,5.122,0.332,0,ST,68.600304,41.907322,30.507612,0.245468,3.657784,16.168028,28.725779,2.998243,2.722656,16.91976,52.905148
3,3.55572,5.502,1.323,1,ST,68.600304,41.907322,30.507612,0.245468,3.657784,16.168028,28.725779,2.998243,2.722656,16.91976,52.905148
6,3.90353,5.571,2.255,2,ST,68.600304,41.907322,30.507612,0.245468,3.657784,16.168028,28.725779,2.998243,2.722656,16.91976,52.905148
9,3.54793,3.869,3.216,3,ST,68.916077,38.472813,29.872593,0.243616,3.076712,16.702744,32.791615,2.977152,2.719164,18.101036,53.443371
12,3.63153,5.275,4.099,4,ST,68.916077,38.472813,29.872593,0.243616,3.076712,16.702744,32.791615,2.977152,2.719164,18.101036,53.443371
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3030,3.55432,19.784,956.300,956,Fed-DT,70.624588,128.823853,85.230438,0.261161,6.827304,21.276678,39.195667,6.390604,3.812326,1.250259,56.480824
3033,3.46963,20.122,957.214,957,CF,77.31472,192.854919,63.42334,0.196701,7.557577,17.802477,46.637829,5.222941,2.614308,1.144091,66.829933
3036,7.46000,30.530,958.239,958,CF,77.31472,192.854919,63.42334,0.196701,7.557577,17.802477,46.637829,5.222941,2.614308,1.144091,66.829933
3039,7.48000,30.590,959.142,959,CF,77.31472,192.854919,63.42334,0.196701,7.557577,17.802477,46.637829,5.222941,2.614308,1.144091,66.829933


In [57]:
cssj.to_excel('cssj.xlsx')

In [41]:
#直接分类
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
%config InlineBackend.figure_format = 'svg'
rock_character = pd.read_excel('../data/模型数据副本.xlsx',sheet_name = '材料')
device_wob = pd.read_excel('../data/模型数据副本.xlsx',sheet_name = '设备wob')
device_T = pd.read_excel('../data/模型数据副本.xlsx',sheet_name = '设备T')
device_S = pd.read_excel('../data/模型数据副本.xlsx',sheet_name = '设备S')
device_Z = pd.read_excel('../data/模型数据副本.xlsx',sheet_name = '设备Z')
device_wob['time'] = [int(temp) for temp in device_wob['时间/s']]
device_T['time'] = [int(temp) for temp in device_T['时间/s']]
device_wob.head(n=10)
del device_wob['时间/s']
del device_T['时间/s']
columns = ['材料2','材料4','材料5','材料7','材料8','材料10','材料11','材料13','材料55','材料21','材料22']
res_df = pd.DataFrame(columns=['wob_min','wob_max','wob_mean','wob_std','wob_skew','wob_kurt','T_min','T_max','T_mean','T_std','T_skew','T_kurt','S','Z','res'])

for time in range(1050,1406,3):
    device_wob_temp = device_wob[(device_wob['time']>=time) & (device_wob['time']<(time+3))]
    device_T_temp = device_T[(device_T['time']>=time) & (device_T['time']<(time+3))]
    
    device_wob_temp_des = device_wob_temp[columns].describe()
    device_T_temp_des = device_T_temp[columns].describe()
    
    #每三钟算一组，统计特征,取少的这个device_T_mean
    for column in columns:
        wob_min = device_wob_temp_des[column]['min']
        wob_max = device_wob_temp_des[column]['max']
        wob_mean = device_wob_temp_des[column]['mean']
        wob_std = device_wob_temp_des[column]['std']
        wob_skew = device_wob_temp[column].skew()#偏度计算
        wob_kurt = device_wob_temp[column].kurt()#峰度计算        

        T_min = device_T_temp_des[column]['min']
        T_max = device_T_temp_des[column]['max']
        T_mean = device_T_temp_des[column]['mean']
        T_std = device_T_temp_des[column]['std']
        T_skew = device_T_temp[column].skew()#偏度计算
        T_kurt = device_T_temp[column].kurt()#峰度计算  


        S = device_S[column].values[0]
        Z = device_Z[column].values[0]
        res = column
        row = {'wob_min':wob_min,'wob_max':wob_max,'wob_mean':wob_mean,'wob_std':wob_std,'wob_skew':wob_skew,'wob_kurt':wob_kurt,'T_min':T_min,'T_max':T_max,'T_mean':T_mean,'T_std':T_std,'T_skew':T_skew,'T_kurt':T_kurt,'S':S,'Z':Z,'res':res}
        res_df.loc[len(res_df)] = row

res_df.head()
# xgboost分类器
import xgboost as xgb
from sklearn.preprocessing import LabelEncoder

le = LabelEncoder()
xxx = ['wob_min','wob_max','wob_mean','wob_std','wob_skew','wob_kurt','T_min','T_max','T_mean','T_std','T_skew','T_kurt']
X = res_df[xxx]

y_encoded = le.fit_transform(res_df['res'])
res_df['label'] = y_encoded
Y = res_df['label']

from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score, accuracy_score # 衡量模型的优劣
# 随机采样20%的数据构建测试样本，其余作为训练样本
Xtrain1, Xtest1, ytrain, ytest = train_test_split(X, Y,test_size=0.2)
Xtrain,Xtest = Xtrain1, Xtest1
model = xgb.XGBClassifier(random_state=1,learning_rate=0.1,use_label_encoder=False)
model.fit(Xtrain, ytrain)
print(model)
print('Accuracy over train set: ', model.score(Xtrain, ytrain))


#模型保存
import pickle
pickle.dump(model, open("材料_classifier.dat","wb"))
loaded_model = pickle.load(open("材料_classifier.dat", "rb"))
# print("Loaded model from: modelOut.dat")

# 模型预测
ypreds = loaded_model.predict(Xtest)
accuracy = accuracy_score(ytest, ypreds)
print("Accuracy1: %.2f%%" % (accuracy * 100.0))
print('Accuracy over all set: ', loaded_model.score(pd.concat([Xtrain,Xtest]), pd.concat([ytrain,ytest])))



XGBClassifier(base_score=None, booster=None, callbacks=None,
              colsample_bylevel=None, colsample_bynode=None,
              colsample_bytree=None, early_stopping_rounds=None,
              enable_categorical=False, eval_metric=None, feature_types=None,
              gamma=None, gpu_id=None, grow_policy=None, importance_type=None,
              interaction_constraints=None, learning_rate=0.1, max_bin=None,
              max_cat_threshold=None, max_cat_to_onehot=None,
              max_delta_step=None, max_depth=None, max_leaves=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=None, num_parallel_tree=None,
              objective='multi:softprob', predictor=None, ...)
Accuracy over train set:  1.0
Accuracy1: 95.80%
Accuracy over all set:  0.9915966386554622


In [None]:
##方案一：直接使用分类模型
import pickle
loaded_model = pickle.load(open("材料_classifier.dat", "rb"))
ypreds = loaded_model.predict(df)
print(le.inverse_transform(ypreds))

In [43]:
cssj = pd.read_excel('./测试数据.xlsx')
cssj['时间'] = [int(temp) for temp in cssj['time']]
cssj['res'] = ' '
start = min(cssj['时间'])
end = max(cssj['时间'])

for time in range(start,end,3):   
    device_temp = cssj[(cssj['时间']>=time) & (cssj['时间']<(time+3))]    
    wob = list(device_temp['WOB'])
    T = list(device_temp['T'])


    #数据预处理
    df = pd.DataFrame(columns=['wob_min','wob_max','wob_mean','wob_std','wob_skew','wob_kurt','T_min','T_max','T_mean','T_std','T_skew','T_kurt'])

    wob_min = np.min(wob)
    wob_max = np.max(wob)
    wob_mean = np.mean(wob)
    wob_std = np.std(wob, ddof=1)  # ddof=0 对于样本标准差，ddof=1 对于总体标准差
    wob_skew = pd.Series(wob).skew()
    wob_kurt = pd.Series(wob).kurt()
    T_min = np.min(T)
    T_max = np.max(T)
    T_mean = np.mean(T)
    T_std = np.std(T, ddof=1)  # ddof=0 对于样本标准差，ddof=1 对于总体标准差
    T_skew = pd.Series(T).skew()
    T_kurt = pd.Series(T).skew()
    row = {'wob_min':wob_min,'wob_max':wob_max,'wob_mean':wob_mean,'wob_std':wob_std,'wob_skew':wob_skew,'wob_kurt':wob_kurt,'T_min':T_min,'T_max':T_max,'T_mean':T_mean,'T_std':T_std,'T_skew':T_skew,'T_kurt':T_kurt}
    df.loc[len(df)] = row
    
    
    
    loaded_model = pickle.load(open("材料_classifier.dat", "rb"))
    ypreds = loaded_model.predict(df)
    print(le.inverse_transform(ypreds)[0])
    
    res = le.inverse_transform(ypreds)[0]
    cssj.loc[(cssj['时间'] >= time) & (cssj['时间'] < (time + 3)), 'res'] = res
cssj

材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7
材料7


Unnamed: 0,T,WOB,time,时间,res
0,3.31282,5.122,0.332,0,材料7
1,3.48516,5.476,0.662,0,材料7
2,3.50521,5.096,0.994,0,材料7
3,3.55572,5.502,1.323,1,材料7
4,3.34059,5.492,1.543,1,材料7
...,...,...,...,...,...
3039,7.48000,30.590,959.142,959,材料7
3040,7.49000,30.520,959.473,959,材料7
3041,7.50000,30.510,959.803,959,材料7
3042,7.45000,30.500,960.135,960,


In [44]:
cssj.to_excel('cssj.xlsx')