In [46]:
import pandas as pd
import numpy as np
import datetime
from scipy.stats import kurtosis, skew, entropy
from scipy.fft import fft

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report

In [3]:
df_train= pd.read_excel('../trainData.xlsx',engine='openpyxl')

In [20]:
def getTimeStatic(signal):

    # 假设你的振动信号数组为signal，每一行代表一个样本
    # signal是一个二维数组，每一行包含多个振动信号的数据

    mean_values = np.mean(signal, axis=1)  # 计算每个样本的均值
    max_values = np.max(signal, axis=1)  # 计算每个样本的最大值
    min_values = np.min(signal, axis=1)  # 计算每个样本的最小值


    # 计算每个样本的均方根RMS
    rms = np.sqrt(np.mean(signal**2, axis=1))

    # 计算每个样本的方差Var
    var = np.var(signal, axis=1)

    # 计算每个样本的峰值PvT
    pvt = np.max(np.abs(signal), axis=1)

    # 计算每个样本的峭度指标K
    kurt = kurtosis(signal, axis=1)

    # 计算每个样本的裕度指标L
    l = pvt / rms

    # 计算每个样本的脉冲指标I
    i = np.max(np.abs(signal[:, 1:] - signal[:, :-1]), axis=1)

    # 计算每个样本的线积分LI
    li = np.sum(np.abs(signal), axis=1)

    # 计算每个样本的峰峰值PP
    pp = np.max(signal, axis=1) - np.min(signal, axis=1)

    # 计算每个样本的信息熵Ent
    ent = entropy(signal, axis=1)

    # 计算每个样本的峰值指标C
    c = pvt / pp

    # 计算每个样本的波形指标W
    w = rms / pp

    # 计算每个样本的偏斜度指标S
    s = skew(signal, axis=1)



    # 计算频谱
    freq_spectrum = np.abs(fft(signal))

    # 计算频谱峰值PvF
    pvf = np.max(freq_spectrum, axis=1)

    # 计算频谱能量En
    en = np.sum(freq_spectrum**2, axis=1)

    # 计算功率谱能量PSD
    psd = en / len(signal[0])


    features = np.column_stack((mean_values, max_values, 
                                min_values, rms, var, 
                                pvt, kurt, l,i,li,pp,ent,c,w,s,pvf,en,psd))
    
    return features


In [21]:
train_data= getTimeStatic(df_train.iloc[:,0:4096].values)
df_train_=pd.DataFrame(data=train_data,columns=['mean_values', 'max_values','min_values', 'rms', 'var', 'pvt', 'kurt', 'l','i','li','pp','ent','c','w','s','pvf','en','psd'])

In [22]:
df_train_clear=pd.merge(left=df_train,right=df_train_,left_index=True,right_index=True,how='inner')

In [23]:
df_train_factor=df_train_clear[['mean_values', 'max_values','min_values', 'rms', 'var', 'pvt', 'kurt', 'l','i','li','pp','ent','c','w','s','pvf','en','psd','ladle']]

In [56]:
df_train_factor.to_excel('./data/train_new_factor.xlsx',index=False)

In [31]:
df_train_factor.describe()

Unnamed: 0,mean_values,max_values,min_values,rms,var,pvt,kurt,l,i,li,pp,ent,c,w,s,pvf,en,psd,ladle
count,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0,4000.0
mean,0.000672,5.63852,-5.617513,1.337839,1.784738,5.915109,0.930774,4.410312,5.129754,4263.870395,11.256033,-inf,0.525526,0.124216,-0.011612,1006.224766,30115250.0,7352.356691,1.5
std,0.101355,1.423099,1.479109,0.0721,0.204146,1.487993,1.622169,1.014022,0.507644,233.508449,2.803412,,0.020292,0.022874,0.066601,346.097349,3374455.0,823.841469,1.118174
min,-0.560504,3.425431,-12.686618,1.064932,1.133889,3.486932,-0.300155,3.104751,4.02596,3481.018359,6.946753,-inf,0.50002,0.057783,-0.521955,353.920832,19026720.0,4645.195069,0.0
25%,-0.014098,4.644441,-5.997659,1.310251,1.703979,4.858355,-0.042563,3.68219,4.765914,4156.75475,9.334201,-inf,0.509511,0.11062,-0.040097,739.166778,28802400.0,7031.835044,0.75
50%,0.000396,5.236086,-5.104036,1.337927,1.783007,5.423355,0.177156,4.092597,5.051141,4285.034782,10.24403,-inf,0.52091,0.129255,-0.006344,920.161462,30032030.0,7332.037403,1.5
75%,0.014645,6.170663,-4.64722,1.363356,1.851111,6.276171,1.131272,4.749281,5.407983,4402.727007,11.963292,-inf,0.536228,0.142443,0.026309,1334.807355,31184490.0,7613.400571,2.25
max,0.636499,12.128032,-3.433436,1.868179,3.48959,12.686618,11.812487,8.847865,7.965467,6081.322075,24.81465,-inf,0.609122,0.167218,0.240148,2607.100136,58554030.0,14295.418154,3.0


In [32]:
# 这里以著名的Iris数据集为例
x = df_train_factor.loc[:,['mean_values', 'max_values','min_values', 'rms', 'var', 'pvt', 'kurt', 'l','i','li','pp','c','w','s','pvf','en','psd']].values
# x= df_train_factor.loc[:,['shape_v','impulse_factor','crest_v']]


y = df_train_factor.loc[:,'ladle'].values

In [33]:
scaler = MinMaxScaler()

In [34]:
x_ = scaler.fit_transform(x)

In [35]:
x_

array([[0.48258477, 0.24883201, 0.75111501, ..., 0.29412351, 0.25558557,
        0.25558557],
       [0.48027241, 0.6213959 , 0.46403834, ..., 0.21291635, 0.32164076,
        0.32164076],
       [0.47425088, 0.6213959 , 0.35475173, ..., 0.21617439, 0.39680339,
        0.39680339],
       ...,
       [0.4660869 , 0.13192296, 0.90783529, ..., 0.46691571, 0.25221598,
        0.25221598],
       [0.45951943, 0.12728369, 0.90783529, ..., 0.45011844, 0.25408628,
        0.25408628],
       [0.46487153, 0.12728369, 0.90783529, ..., 0.45977372, 0.25043982,
        0.25043982]])

In [36]:

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(x_, y, test_size=0.3, random_state=42)

In [54]:
# 创建随机森林分类器实例
clf = RandomForestClassifier(n_estimators=10, random_state=42,max_depth=4)
# 训练模型
clf.fit(X_train, y_train)
# 使用测试集进行预测
y_pred = clf.predict(X_test)

# 计算并打印准确率
accuracy = accuracy_score(y_test, y_pred)
print(f'准确率: {accuracy:.2f}')

准确率: 0.82


In [55]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.71      0.77       315
           1       0.69      0.92      0.78       307
           2       0.82      0.65      0.73       294
           3       0.97      1.00      0.98       284

    accuracy                           0.82      1200
   macro avg       0.83      0.82      0.82      1200
weighted avg       0.83      0.82      0.82      1200



In [38]:
## 加载测试集数据
df_test= pd.read_excel('../testData.xlsx',engine='openpyxl')

In [39]:
test_data= getTimeStatic(df_test.iloc[:,0:4096].values)
df_test_=pd.DataFrame(data=test_data,columns=['mean_values', 'max_values','min_values', 'rms', 'var', 'pvt', 'kurt', 'l','i','li','pp','ent','c','w','s','pvf','en','psd'])

In [57]:
df_test_.to_excel('./data/test_new_factor.xlsx',index=False)

In [40]:
X_input=df_test_.loc[:,['mean_values', 'max_values','min_values', 'rms', 'var', 'pvt', 'kurt', 'l','i','li','pp','c','w','s','pvf','en','psd']].values

In [41]:
x_input_=scaler.transform(X_input)

In [51]:
Y_test=clf.predict(x_input_)

In [52]:
save_df=pd.DataFrame(Y_test)

In [53]:
save_df[0].value_counts()

2    1291
1     329
0     311
3      69
Name: 0, dtype: int64

In [45]:
save_df.to_csv('./data/result10.csv',index=False)