In [167]:
import pandas as pd
import numpy as np
import os
from scipy import stats
from scipy import signal

In [168]:
#列名定义
df_out_columns = ['time_mean','time_std','time_max','time_min',
                  'time_ptp','time_median','time_iqr','time_pr',
                  'time_skew','time_kurtosis','time_var','time_amp',
                  'time_smr',
                  'time_pulse','time_margin','1X','2X','3X','1XRatio',
                  '2XRatio','3XRatio']    #'time_rms','time_wavefactor','time_peakfactor',

In [169]:
DE_columns = ['DE_' + i for i in df_out_columns]
FE_columns = ['FE_' + i for i in df_out_columns]
label_columns = ['label']
full_columns = DE_columns + FE_columns + label_columns

In [170]:
#直接白嫖特征提取函数
def featureget(df_line):
    #提取时域特征
    time_mean = df_line.mean()
    time_std = df_line.std()
    time_max = df_line.max()
    time_min = df_line.min()
    #time_rms = np.sqrt(np.square(df_line).mean().astype(np.float64))   #AttributeError: 'float' object has no attribute 'astype'
    time_ptp = np.asarray(df_line).ptp()
    time_median = np.median(df_line)
    time_iqr = np.percentile(df_line,75)-np.percentile(df_line,25)
    time_pr = np.percentile(df_line,90)-np.percentile(df_line,10)
    time_skew = stats.skew(df_line)
    time_kurtosis = stats.kurtosis(df_line)
    time_var = np.var(df_line)
    time_amp = np.abs(df_line).mean()
    time_smr = np.square(np.sqrt(np.abs(df_line).astype(np.float64)).mean())
    #下面四个特征需要注意分母为0或接近0问题，可能会发生报错
    #time_wavefactor = time_rms/time_amp
    #time_peakfactor = time_max/time_rms
    time_pulse = time_max/time_amp
    time_margin = time_max/time_smr
    #提取频域特征倍频能量以及能量占比
    plist_raw = np.fft.fft(list(df_line), n=1024)
    plist = np.abs(plist_raw)
    plist_energy = (np.square(plist)).sum()
    #在傅里叶变换结果中，在32点处的幅值为一倍频幅值，64点处幅值为二倍频幅值，96点处为三倍频幅值，因此提取这三处幅值并计算能量占比
    return_list = [
    time_mean,time_std,time_max,time_min,time_ptp, 
    time_median,time_iqr,time_pr,time_skew,time_kurtosis,
    time_var,time_amp,time_smr,
    time_pulse,time_margin,plist[32], plist[64], plist[96],
    np.square(plist[32]) / plist_energy,
    np.square(plist[64]) / plist_energy,
    np.square(plist[96]) / plist_energy
    ]                                              #time_rms,time_wavefactor,time_peakfactor,
    return return_list

In [171]:
windowSize = 100   #时间窗大小

In [181]:
#特征提取的文件路径  B
B_fault = pd.read_csv('./merge_4/B_fault.csv')

feature_B = []
for i in range(0,int(len(B_fault)/windowSize)):  #int(len(B_fault)/windowSize)              #残余数据省略了，能不能改进？
    fea_DE = featureget(B_fault.loc[i*windowSize+1:(i+1)*windowSize,'DE_time'])
    fea_FE = featureget(B_fault.loc[i*windowSize+1:(i+1)*windowSize,'FE_time'])
    #fea_DE.extend('B')
    fea_FE.extend('B')
    #feature_DE.append(fea_DE)          
    #feature_FE.append(fea_FE)  
    fea_DE.extend(fea_FE)
    feature_B.append(fea_DE)
    #print(feature_B)
#换成数据帧格式
feature_B = pd.DataFrame(feature_B,columns=full_columns)
#提取完的保存路径
feature_B.to_csv('./feature_4/feature_B.csv',index=False)

In [178]:
#特征提取的文件路径    IR
IR_fault = pd.read_csv('./merge_4/IR_fault.csv')

feature_IR = []
for i in range(0,int(len(IR_fault)/windowSize)):  #int(len(B_fault)/windowSize)              #残余数据省略了，能不能改进？
    fea_DE = featureget(IR_fault.loc[i*windowSize+1:(i+1)*windowSize,'DE_time'])
    fea_FE = featureget(IR_fault.loc[i*windowSize+1:(i+1)*windowSize,'FE_time'])
    fea_FE.extend(['IR'])
    fea_DE.extend(fea_FE)
    feature_IR.append(fea_DE)



#换成数据帧格式
feature_IR = pd.DataFrame(feature_IR,columns=full_columns)
#提取完的保存路径
feature_IR.to_csv('./feature_4/feature_IR.csv',index=False)

In [182]:
#特征提取的文件路径    NORMAL
NORMAL = pd.read_csv('./merge_4/NORMAL.csv')

feature_NORMAL = []
for i in range(0,int(len(NORMAL)/windowSize)):  #int(len(B_fault)/windowSize)              #残余数据省略了，能不能改进？
    fea_DE = featureget(NORMAL.loc[i*windowSize+1:(i+1)*windowSize,'DE_time'])
    fea_FE = featureget(NORMAL.loc[i*windowSize+1:(i+1)*windowSize,'FE_time'])
    fea_FE.extend(['NORMAL']) 
    fea_DE.extend(fea_FE)
    feature_NORMAL.append(fea_DE)

#换成数据帧格式
feature_NORMAL = pd.DataFrame(feature_NORMAL,columns=full_columns)
#提取完的保存路径
feature_NORMAL.to_csv('./feature_4/feature_NORMAL.csv',index=False)

In [183]:
#特征提取的文件路径    OR
OR_fault = pd.read_csv('./merge_4/OR_fault.csv')

feature_OR = []
for i in range(0,int(len(OR_fault)/windowSize)):  #int(len(B_fault)/windowSize)              #残余数据省略了，能不能改进？
    fea_DE = featureget(OR_fault.loc[i*windowSize+1:(i+1)*windowSize,'DE_time'])
    fea_FE = featureget(OR_fault.loc[i*windowSize+1:(i+1)*windowSize,'FE_time'])
    fea_FE.extend(['OR'])
    fea_DE.extend(fea_FE)
    feature_OR.append(fea_DE)

#换成数据帧格式
feature_OR = pd.DataFrame(feature_OR,columns=full_columns)
#提取完的保存路径
feature_OR.to_csv('./feature_4/feature_OR.csv',index=False)