In [1]:
import pandas as pd
import numpy as np
import datetime
from scipy.stats import kurtosis, skew, entropy
from scipy.fft import fft

from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import classification_report

In [30]:
df_train= pd.read_excel('../trainData.xlsx',engine='openpyxl')

In [31]:
def getTimeStatic(signal):

    # 假设你的振动信号数组为signal，每一行代表一个样本
    # signal是一个二维数组，每一行包含多个振动信号的数据

    mean_values = np.mean(signal, axis=1)  # 计算每个样本的均值
    max_values = np.max(signal, axis=1)  # 计算每个样本的最大值
    min_values = np.min(signal, axis=1)  # 计算每个样本的最小值


    # 计算每个样本的均方根RMS
    rms = np.sqrt(np.mean(signal**2, axis=1))

    # 计算每个样本的方差Var
    var = np.var(signal, axis=1)

    # 计算每个样本的峰值PvT
    pvt = np.max(np.abs(signal), axis=1)

    # 计算每个样本的峭度指标K
    kurt = kurtosis(signal, axis=1)

    # 计算每个样本的裕度指标L
    l = pvt / rms

    # 计算每个样本的脉冲指标I
    i = np.max(np.abs(signal[:, 1:] - signal[:, :-1]), axis=1)

    # 计算每个样本的线积分LI
    li = np.sum(np.abs(signal), axis=1)

    # 计算每个样本的峰峰值PP
    pp = np.max(signal, axis=1) - np.min(signal, axis=1)

    # 计算每个样本的信息熵Ent
    ent = entropy(signal, axis=1)

    # 计算每个样本的峰值指标C
    c = pvt / pp

    # 计算每个样本的波形指标W
    w = rms / pp

    # 计算每个样本的偏斜度指标S
    s = skew(signal, axis=1)



    # 计算频谱
    freq_spectrum = np.abs(fft(signal))

    # 计算频谱峰值PvF
    pvf = np.max(freq_spectrum, axis=1)

    # 计算频谱能量En
    en = np.sum(freq_spectrum**2, axis=1)

    # 计算功率谱能量PSD
    psd = en / len(signal[0])


    features = np.column_stack((mean_values, max_values, 
                                min_values, rms, var, 
                                pvt, kurt, l,i,li,pp,ent,c,w,s,pvf,en,psd))
    
    return features


In [32]:
import pywt

# 假设你的信号为signal

# 设置小波包分解的参数
wavelet = 'db4'  # 小波基函数
level = 3  # 分解的层数

In [38]:
signal=df_train.iloc[0,0:4096].values

In [39]:
# 进行小波包分解
def getSmallWave(signal):
    wp = pywt.WaveletPacket(data=signal, wavelet=wavelet, mode='symmetric', maxlevel=level)
    nodes = wp.get_level(level, 'freq')

    # 计算每个分解信号的能量
    energies = []
    for node in nodes:
        energy = np.sum(np.abs(node.data)**2)
        energies.append(energy)
    return energies

In [40]:
def waveFactor(df):

    result=[]
    for i in range(len(df)):
        r=getSmallWave(df.iloc[i,0:4096])
        result.append(r)

    df_new=pd.DataFrame(data=result,columns=['e1','e2','e3','e4','e5','e6','e7','e8'])

    return df_new

In [41]:
df_train_wave= waveFactor(df_train)

In [42]:
df_train_wave

Unnamed: 0,e1,e2,e3,e4,e5,e6,e7,e8
0,2020.582104,2477.016781,411.566989,488.091049,438.582148,448.405539,454.655144,427.328369
1,3163.612261,2063.504181,403.989960,418.432156,413.805737,410.044712,447.853149,440.151386
2,3745.938785,2185.635656,477.182499,444.986172,405.547810,412.675127,435.677467,412.044234
3,3420.222988,3149.275182,445.875232,453.975765,387.902180,386.957414,402.246653,421.336284
4,3687.711794,2725.534074,407.640230,467.914290,409.745210,396.095404,371.634651,431.342232
...,...,...,...,...,...,...,...,...
3995,1709.122058,2671.081491,826.596160,432.165356,407.607407,416.026209,421.282425,422.511457
3996,1737.396673,2657.726657,817.738848,430.514955,410.629426,408.210394,423.720306,431.894079
3997,1505.461628,2745.966783,735.914334,489.746343,367.281567,424.877283,395.029102,439.698910
3998,1515.795217,2767.235426,803.896412,430.465714,382.845729,468.915935,411.455088,437.561874


In [44]:
df_train_factor

Unnamed: 0,mean_values,max_values,min_values,rms,var,pvt,kurt,l,i,li,pp,ent,c,w,s,pvf,en,psd,ladle
0,0.017151,5.590916,-5.736414,1.317666,1.735949,5.736414,0.972418,4.353466,5.462647,4108.722497,11.327330,-inf,0.506422,0.116326,0.011263,1016.633840,2.912933e+07,7111.652842,0
1,0.014384,8.833191,-8.392787,1.375453,1.891663,8.833191,5.333521,6.422025,6.150712,3966.561871,17.225978,-inf,0.512783,0.079848,-0.066985,833.659554,3.174031e+07,7749.100129,0
2,0.007176,8.833191,-9.404036,1.438386,2.068903,9.404036,5.921244,6.537908,6.150712,4070.618382,18.237227,-inf,0.515651,0.078871,-0.170919,841.000484,3.471129e+07,8474.436257,0
3,-0.015943,6.690452,-8.727200,1.473441,2.170774,8.727200,0.966754,5.923006,5.181931,4719.580972,15.417652,-inf,0.566052,0.095568,-0.093966,895.379960,3.642381e+07,8892.531621,0
4,-0.020970,6.690452,-8.727200,1.464350,2.143882,8.727200,1.010303,5.959776,5.181931,4688.707345,15.417652,-inf,0.566052,0.094979,-0.089734,949.623732,3.597575e+07,8783.143279,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3995,-0.000070,4.573504,-4.318925,1.330065,1.769073,4.573504,-0.105477,3.438556,5.782219,4356.145599,8.892429,-inf,0.514314,0.149573,0.023365,1405.056452,2.968013e+07,7246.124851,3
3996,0.000687,4.573504,-4.318925,1.330453,1.770103,4.573504,-0.100919,3.437555,5.782219,4354.412133,8.892429,-inf,0.514314,0.149616,0.009580,1462.519095,2.969742e+07,7250.345596,3
3997,-0.002597,4.573504,-4.286252,1.314650,1.728298,4.573504,-0.115189,3.478875,5.782219,4307.530824,8.859756,-inf,0.516211,0.148384,0.001770,1405.965639,2.899614e+07,7079.135587,3
3998,-0.010458,4.533130,-4.286252,1.316325,1.732602,4.533130,-0.090003,3.443778,5.112794,4304.210667,8.819382,-inf,0.513996,0.149254,-0.019340,1368.118383,2.907007e+07,7097.184369,3


In [21]:
train_data= getTimeStatic(df_train.iloc[:,0:4096].values)
df_train_=pd.DataFrame(data=train_data,columns=['mean_values', 'max_values','min_values', 'rms', 'var', 'pvt', 'kurt', 'l','i','li','pp','ent','c','w','s','pvf','en','psd'])

In [22]:
df_train_clear=pd.merge(left=df_train,right=df_train_,left_index=True,right_index=True,how='inner')

In [23]:
df_train_factor=df_train_clear[['mean_values', 'max_values','min_values', 'rms', 'var', 'pvt', 'kurt', 'l','i','li','pp','ent','c','w','s','pvf','en','psd','ladle']]

In [56]:
df_train_factor.to_excel('./data/train_new_factor.xlsx',index=False)

In [43]:
df_train_factor=pd.read_excel('./data/train_new_factor.xlsx',engine='openpyxl')

In [45]:
df_train_clear=pd.merge(left=df_train_factor,right=df_train_wave,left_index=True,right_index=True,how='inner')

In [47]:
df_train_clear.to_excel('./data/train_new_factor.xlsx',index=False)

In [7]:
cols=['mean_values', 'max_values','min_values', 'rms', 'var', 'pvt', 'kurt', 'l','i','li','pp','c','w','s','pvf','en','psd']

In [8]:
for col in cols:
    df_train_factor[col]=(df_train_factor[col]-df_train_factor[col].min())/(df_train_factor[col].max()-df_train_factor[col].min())

In [9]:
df_train_factor.loc[df_train_factor['ladle']==0,].describe()

Unnamed: 0,mean_values,max_values,min_values,rms,var,pvt,kurt,l,i,li,pp,ent,c,w,s,pvf,en,psd,ladle
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.468207,0.341269,0.671934,0.344858,0.270028,0.360235,0.201682,0.339806,0.293394,0.256991,0.331191,-inf,0.276955,0.487431,0.655273,0.307497,0.285804,0.285804,0.0
std,0.161735,0.208975,0.198799,0.101488,0.101777,0.1983,0.196338,0.220037,0.149805,0.103639,0.198007,,0.218713,0.241158,0.135266,0.19021,0.097625,0.097625,0.0
min,0.0,0.0,0.0,0.107121,0.080919,0.081333,0.009036,0.025805,0.042796,0.037254,0.060682,-inf,0.0,0.0,0.0,0.039375,0.080928,0.080928,0.0
25%,0.456328,0.213525,0.48063,0.289819,0.205576,0.210936,0.034191,0.172785,0.179617,0.200985,0.160915,-inf,0.100856,0.251512,0.610854,0.17813,0.233453,0.233453,0.0
50%,0.469711,0.280916,0.721266,0.340616,0.25952,0.296382,0.124067,0.262111,0.267436,0.255737,0.277706,-inf,0.217659,0.508744,0.684709,0.262804,0.279109,0.279109,0.0
75%,0.481018,0.471948,0.829961,0.385543,0.30719,0.526464,0.354089,0.521047,0.369843,0.300371,0.501121,-inf,0.392575,0.686195,0.733867,0.372658,0.320667,0.320667,0.0
max,1.0,1.0,0.999328,0.945836,0.931931,1.0,1.0,1.0,0.883823,0.882466,1.0,-inf,1.0,0.971755,1.0,1.0,0.931807,0.931807,0.0


In [10]:
df_train_factor.loc[df_train_factor['ladle']==1,].describe()

Unnamed: 0,mean_values,max_values,min_values,rms,var,pvt,kurt,l,i,li,pp,ent,c,w,s,pvf,en,psd,ladle
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.469311,0.298497,0.73178,0.335405,0.275752,0.300225,0.118442,0.273528,0.251928,0.279714,0.279367,-inf,0.218382,0.518399,0.668003,0.192373,0.275734,0.275734,1.0
std,0.012771,0.120777,0.127322,0.07087,0.064019,0.119075,0.083136,0.128718,0.102507,0.060662,0.118382,,0.181019,0.1487,0.072375,0.059519,0.064023,0.064023,0.0
min,0.412418,0.070453,0.409556,0.103121,0.077875,0.104463,0.019952,0.059157,0.007969,0.075594,0.092163,-inf,0.000551,0.197097,0.414054,0.03239,0.077793,0.077793,1.0
25%,0.461028,0.20994,0.702977,0.307853,0.24952,0.215571,0.057861,0.181072,0.17546,0.258112,0.191666,-inf,0.076423,0.449833,0.625296,0.148958,0.2495,0.2495,1.0
50%,0.469275,0.275464,0.768127,0.340392,0.278751,0.270899,0.095167,0.244145,0.235889,0.285319,0.246009,-inf,0.164617,0.539299,0.672107,0.186282,0.278904,0.278904,1.0
75%,0.478236,0.360435,0.817215,0.372754,0.308793,0.355179,0.120933,0.351106,0.310316,0.314161,0.304196,-inf,0.329204,0.624257,0.71286,0.239153,0.308725,0.308725,1.0
max,0.503412,0.554665,0.943745,0.596418,0.53044,0.588063,0.370657,0.648306,0.631632,0.442482,0.546349,-inf,0.964412,0.86062,0.849982,0.372738,0.5305,0.5305,1.0


In [11]:
df_train_factor.loc[df_train_factor['ladle']==2,].describe()

Unnamed: 0,mean_values,max_values,min_values,rms,var,pvt,kurt,l,i,li,pp,ent,c,w,s,pvf,en,psd,ladle
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.469863,0.2353,0.7848,0.336786,0.278912,0.245521,0.068399,0.204301,0.271623,0.316534,0.22113,-inf,0.241158,0.633771,0.685238,0.199856,0.280047,0.280047,2.0
std,0.045399,0.156408,0.156707,0.127251,0.122382,0.161351,0.088701,0.156557,0.135823,0.109252,0.150923,,0.195256,0.184394,0.067153,0.087919,0.122176,0.122176,0.0
min,0.33303,0.003952,0.15336,0.0,0.0,0.0,0.004991,0.011881,0.0,0.0,0.0,-inf,0.000251,0.114048,0.465297,0.0,0.0,0.0,2.0
25%,0.449522,0.138595,0.760167,0.283535,0.225695,0.142082,0.023232,0.098466,0.171291,0.274508,0.130543,-inf,0.078604,0.560901,0.64296,0.140771,0.227904,0.227904,2.0
50%,0.466477,0.184512,0.842915,0.333898,0.27197,0.190132,0.030366,0.149539,0.248978,0.322625,0.168472,-inf,0.204188,0.685675,0.690061,0.194308,0.27299,0.27299,2.0
75%,0.485814,0.262173,0.881065,0.376876,0.311301,0.265963,0.050478,0.239418,0.349866,0.366611,0.23101,-inf,0.34388,0.762404,0.729288,0.254179,0.312564,0.312564,2.0
max,0.615062,0.806586,1.0,1.0,1.0,0.845748,0.485592,0.840618,1.0,1.0,0.798019,-inf,0.85994,0.922805,0.873056,0.607091,1.0,1.0,2.0


In [12]:
df_train_factor.loc[df_train_factor['ladle']==3,].describe()   c w pvf pp li i I kurt pvt var 

Unnamed: 0,mean_values,max_values,min_values,rms,var,pvt,kurt,l,i,li,pp,ent,c,w,s,pvf,en,psd,ladle
count,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0,1000.0
mean,0.467891,0.142142,0.867345,0.341967,0.280454,0.149784,0.017971,0.09167,0.303798,0.351007,0.133009,-inf,0.19861,0.788633,0.670094,0.458289,0.280528,0.280528,3.0
std,0.017757,0.042917,0.036688,0.026039,0.023765,0.03468,0.007302,0.039298,0.116047,0.028051,0.029263,,0.127861,0.068562,0.045542,0.025804,0.023802,0.023802,0.0
min,0.406711,0.044343,0.770868,0.278438,0.223487,0.071209,0.0,0.0,0.06451,0.283011,0.056336,-inf,0.006076,0.629262,0.554545,0.37074,0.223418,0.223418,3.0
25%,0.456364,0.115894,0.846016,0.323343,0.263352,0.127645,0.012624,0.062263,0.216438,0.330654,0.112212,-inf,0.09077,0.746983,0.637277,0.441562,0.263426,0.263426,3.0
50%,0.467426,0.138908,0.865131,0.341305,0.2798,0.144107,0.017655,0.089734,0.273522,0.351722,0.132282,-inf,0.181585,0.789527,0.666039,0.458638,0.279738,0.279738,3.0
75%,0.479735,0.164655,0.895585,0.360383,0.297015,0.169372,0.021903,0.11186,0.372955,0.373162,0.150656,-inf,0.283024,0.836249,0.703801,0.475134,0.297258,0.297258,3.0
max,0.519641,0.246685,0.931595,0.41805,0.350918,0.22667,0.042322,0.19042,0.615669,0.434336,0.209134,-inf,0.621022,1.0,0.825498,0.542167,0.351426,0.351426,3.0


In [15]:
# 这里以著名的Iris数据集为例
# x = df_train_factor.loc[:,['mean_values', 'max_values','min_values', 'rms', 'var', 'pvt', 'kurt', 'l','i','li','pp','c','w','s','pvf','en','psd']].values

x = df_train_factor.loc[:,['c','w','pvf','pp','li','i','l','kurt','pvt','var']].values



# x= df_train_factor.loc[:,['shape_v','impulse_factor','crest_v']]


y = df_train_factor.loc[:,'ladle'].values

In [16]:
scaler = MinMaxScaler()

In [17]:
x_ = scaler.fit_transform(x)

In [18]:
x_

array([[0.0586804 , 0.53496102, 0.29412351, ..., 0.10506154, 0.2445172 ,
        0.25557564],
       [0.11698156, 0.20162406, 0.21291635, ..., 0.46510715, 0.58113492,
        0.3216766 ],
       [0.14326339, 0.192699  , 0.21617439, ..., 0.51362853, 0.64318536,
        0.3969151 ],
       ...,
       [0.14840058, 0.82790428, 0.46691571, ..., 0.01527049, 0.11810959,
        0.25232763],
       [0.12810103, 0.83584676, 0.45011844, ..., 0.0173498 , 0.113721  ,
        0.25415461],
       [0.12810103, 0.83246142, 0.45977372, ..., 0.01822696, 0.113721  ,
        0.25054712]])

In [19]:

# 划分训练集和测试集
X_train, X_test, y_train, y_test = train_test_split(x_, y, test_size=0.3, random_state=42)

In [20]:
# 创建随机森林分类器实例
clf = RandomForestClassifier(n_estimators=10, random_state=42,max_depth=4)
# 训练模型
clf.fit(X_train, y_train)
# 使用测试集进行预测
y_pred = clf.predict(X_test)

# 计算并打印准确率
accuracy = accuracy_score(y_test, y_pred)
print(f'准确率: {accuracy:.2f}')

准确率: 0.79


In [21]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.85      0.59      0.70       315
           1       0.66      0.92      0.77       307
           2       0.76      0.66      0.71       294
           3       0.95      1.00      0.97       284

    accuracy                           0.79      1200
   macro avg       0.80      0.79      0.79      1200
weighted avg       0.80      0.79      0.78      1200



In [48]:
## 加载测试集数据
df_test= pd.read_excel('../testData.xlsx',engine='openpyxl')

In [49]:
df_test.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,4086,4087,4088,4089,4090,4091,4092,4093,4094,4095
0,-2.29396,-0.513805,1.343895,0.37017,0.290264,-0.522328,1.163511,-0.118602,-0.306767,2.489014,...,1.018694,1.593836,0.407928,1.301338,-1.077733,1.80263,0.205996,-0.774869,0.554761,-0.667287
1,-9.385027,25.779335,31.801008,15.070719,-14.019687,-28.492617,-19.386296,1.883315,13.071696,15.427468,...,-37.174529,-20.727179,15.427468,56.493562,64.234396,17.63562,-45.262888,-69.924685,-32.020125,18.349119
2,-0.498495,0.205223,-0.777561,-2.505312,-1.983144,-1.953759,0.068401,-0.162355,-0.152753,-0.973466,...,-0.791544,-0.74511,-0.767275,0.307665,0.413021,-1.322902,-1.490804,0.880657,-0.064796,0.536817
3,-0.926654,-0.296114,-0.370281,0.084795,0.996644,-0.086162,-1.440777,-0.748457,0.081069,-0.174502,...,-0.746462,-2.328719,-2.626518,-0.537124,-1.581936,-0.186657,0.253696,0.281353,-0.487527,2.130181
4,53.814057,73.502129,-5.077095,109.964737,80.81485,-40.284858,98.945183,46.732117,-80.771621,125.369034,...,39.332863,34.731766,-45.635974,-22.803575,14.221549,-57.059387,-7.731019,29.02006,-46.544658,-13.788888


In [51]:
df_test_wave= waveFactor(df_test)

In [52]:
test_data= getTimeStatic(df_test.iloc[:,0:4096].values)
df_test_=pd.DataFrame(data=test_data,columns=['mean_values', 'max_values','min_values', 'rms', 'var', 'pvt', 'kurt', 'l','i','li','pp','ent','c','w','s','pvf','en','psd'])

In [53]:
df_test_clear=pd.merge(left=df_test_wave,right=df_test_,left_index=True,right_index=True,how='inner')

In [54]:
len(df_test_clear)

2000

In [55]:
df_test_clear

Unnamed: 0,e1,e2,e3,e4,e5,e6,e7,e8,mean_values,max_values,...,i,li,pp,ent,c,w,s,pvf,en,psd
0,2.509434e+03,2079.699417,3.979084e+02,415.438162,4.030802e+02,3.955834e+02,4.257608e+02,399.193686,-0.007540,4.846138,...,4.504270,4300.356547,9.045058,-inf,0.535777,0.144260,0.058823,829.711719,2.856487e+07,6.973846e+03
1,1.019989e+05,596784.520153,1.100381e+06,157740.871451,4.464309e+04,4.542352e+04,1.241934e+04,1136.917796,0.069638,84.261541,...,75.766070,71552.305507,165.986618,-inf,0.507641,0.132768,0.095758,16084.728759,8.147980e+09,1.989253e+06
2,2.335288e+03,1766.888300,1.155688e+03,617.519596,4.117786e+02,4.949741e+02,4.634202e+02,387.565811,0.023149,5.263394,...,5.119218,4457.665297,10.526809,-inf,0.500001,0.128923,-0.084611,906.199261,3.090133e+07,7.544271e+03
3,2.267265e+03,2853.621589,5.191581e+02,465.416561,4.045885e+02,4.215892e+02,4.327995e+02,368.509481,-0.038242,5.550046,...,4.561339,4280.279359,11.403766,-inf,0.513315,0.120069,0.004216,1092.229996,3.145419e+07,7.679245e+03
4,1.256817e+06,597007.561547,9.744292e+05,817595.420037,2.054257e+06,6.902464e+06,2.156692e+06,37959.716132,0.036311,259.161416,...,535.890701,183248.169444,618.061371,-inf,0.580687,0.096512,-0.139592,24042.177119,5.969665e+10,1.457438e+07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1995,2.179549e+03,1686.122343,1.093332e+03,583.394420,4.500843e+02,4.896035e+02,3.488034e+02,432.826558,0.001162,4.391828,...,4.560643,4348.767110,9.899780,-inf,0.556371,0.133774,-0.178246,975.743283,2.942491e+07,7.183816e+03
1996,2.170071e+03,1895.808872,4.343166e+02,428.247919,4.000062e+02,4.463432e+02,3.883744e+02,381.041645,0.007276,4.308881,...,5.178508,4060.173335,8.822702,-inf,0.511614,0.141947,0.031641,695.792464,2.631342e+07,6.424175e+03
1997,2.084764e+03,1566.887294,9.949072e+02,576.546183,4.341305e+02,4.602428e+02,3.755554e+02,414.422065,0.000752,4.391828,...,5.280583,4229.693129,9.133120,-inf,0.519132,0.141602,-0.135060,881.448155,2.806066e+07,6.850748e+03
1998,3.984815e+01,1.033032,1.422243e+00,1.287114,1.848201e+00,1.602683e+00,5.017491e-01,0.035922,0.013251,0.274425,...,0.216043,379.443103,0.581754,-inf,0.528281,0.184206,-0.205237,249.230696,1.926661e+05,4.703761e+01


In [56]:
df_test_clear.to_excel('./data/test_new_factor.xlsx',index=False)

In [23]:
df_test_=pd.read_excel('./data/test_new_factor.xlsx',engine='openpyxl')

In [24]:
# X_input=df_test_.loc[:,['mean_values', 'max_values','min_values', 'rms', 'var', 'pvt', 'kurt', 'l','i','li','pp','c','w','s','pvf','en','psd']].values
X_input=df_test_.loc[:,['c','w','pvf','pp','li','i','l','kurt','pvt','var']].values

In [25]:
x_input_=scaler.transform(X_input)

In [26]:
Y_test=clf.predict(x_input_)

In [27]:
save_df=pd.DataFrame(Y_test)

In [28]:
save_df[0].value_counts()

0    908
2    696
1    359
3     37
Name: 0, dtype: int64

In [29]:
save_df.to_csv('./data/result11.csv',index=False)