In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

## 读入数据并处理特征

In [2]:
data_path = './data/combined_data.csv'
raw_data = pd.read_csv(data_path)
raw_data.head()

Unnamed: 0,WindNumber,Time,WindSpeed,Power,RotorSpeed,Width,RatedPower,CutInWindSpeed,CutOutWindSpeed,WheelSpeedMin,WheelSpesdMax
0,1,2017/11/1 0:20,4.188333,65.75,5.779167,99.0,2000,3.0,25,8.33,16.8
1,1,2017/11/1 0:30,4.0425,120.7,8.6365,99.0,2000,3.0,25,8.33,16.8
2,1,2017/11/1 0:40,4.137368,135.157895,8.668421,99.0,2000,3.0,25,8.33,16.8
3,1,2017/11/1 0:50,4.274737,157.315789,8.727895,99.0,2000,3.0,25,8.33,16.8
4,1,2017/11/1 1:00,4.108947,129.894737,8.656842,99.0,2000,3.0,25,8.33,16.8


### 给raw_data添加label, year, month, day

In [3]:
raw_data['label']=-1
raw_data['Year'] = raw_data['Time'].apply(lambda x:int(x.split(' ')[0].split('/')[0]))
raw_data['Month'] = raw_data['Time'].apply(lambda x:int(x.split(' ')[0].split('/')[1]))
raw_data['Day'] = raw_data['Time'].apply(lambda x:int(x.split(' ')[0].split('/')[2]))
raw_data['Index'] = raw_data.index

# 把windspeed和power归一化保存
max_min_scaler = lambda x : (x-np.min(x))/(np.max(x)-np.min(x))
raw_data['NormWindSpeed'] = raw_data[['WindSpeed']].apply(max_min_scaler)
raw_data['NormPower'] = raw_data[['Power']].apply(max_min_scaler)

In [4]:
raw_data=raw_data[['WindNumber', 'Time', 'WindSpeed', 'Power', 'RotorSpeed', 'label', 'Year', 'Month', 'Day', 'NormWindSpeed', 'NormPower']]

In [5]:
raw_data.head()

Unnamed: 0,WindNumber,Time,WindSpeed,Power,RotorSpeed,label,Year,Month,Day,NormWindSpeed,NormPower
0,1,2017/11/1 0:20,4.188333,65.75,5.779167,-1,2017,11,1,0.419569,0.076748
1,1,2017/11/1 0:30,4.0425,120.7,8.6365,-1,2017,11,1,0.415905,0.100808
2,1,2017/11/1 0:40,4.137368,135.157895,8.668421,-1,2017,11,1,0.418289,0.107138
3,1,2017/11/1 0:50,4.274737,157.315789,8.727895,-1,2017,11,1,0.42174,0.116839
4,1,2017/11/1 1:00,4.108947,129.894737,8.656842,-1,2017,11,1,0.417575,0.104833


## 定义各项函数

### 四分法函数

In [6]:
def get_mid(data):
    # data: series
    # idx在n为偶数时返回n/2
    x = data.sort_values()
    n = x.shape[0]
    idx = (n+1)/2 if n%2==1 else n/2
    idx = int(idx-1)
    mid = x.iloc[idx] if n%2==1 else (x.iloc[idx]+x.iloc[int((n+2)/2-1)])/2
    return idx, mid

def get_F(data, low_coef=1.5, up_coef=1.5):
    # data: series
    x = data.sort_values()
    n = x.shape[0]
    idx_2, q_2 = get_mid(x)
    k = n//4
    # print(k, n)
    if n%2==0:
        q_1_data = x.iloc[:idx_2] if idx_2%2 else x.iloc[:idx_2+1]
        q_3_data = x.iloc[idx_2+1:]
        idx_1, q_1 = get_mid(q_1_data)
        idx_3, q_3 = get_mid(q_3_data)
    elif n%4==3:
        q_1 = 0.75*x.iloc[k]+0.25*x.iloc[k+1]
        q_3 = 0.25*x.iloc[3*k]+0.75*x.iloc[3*k+2]
    elif n%4==1:
        q_1 = 0.25*x.iloc[k-1]+0.75*x.iloc[k]
        q_3 = 0.75*x.iloc[3*k]+0.25*x.iloc[3*k+1]
    i_qr = q_3-q_1
    f_1, f_u = q_1-low_coef*i_qr, q_3+up_coef*i_qr
    return f_1, f_u

### 定义横向和纵向四分位法

In [7]:
def horizonal_process(current_data,low_coef=1.5, up_coef=1.5):
    '''传入DataFrame，返回异常数据的index
    '''
    res_data = current_data[current_data['label']!=1].copy()
    res_data = res_data.sort_values(by=['Power'])
    st = np.linspace(0, 2000, 81, dtype=int)

    # 对每个power段
    for i in range(1, st.shape[0]):
        p_min, p_max = st[i-1], st[i]
        flag = (res_data['Power']>=p_min) & (res_data['Power']<=p_max)
        sub_data = res_data[flag]

        if sub_data.shape[0]<4:
            # continue
            f_1 = f_u = 0
        else:
            f_1, f_u = get_F(sub_data['WindSpeed'],low_coef,up_coef)

        flag_1_u = (res_data['WindSpeed']<f_1) | (res_data['WindSpeed']>f_u)
        flag_com = flag & flag_1_u
        res_data.loc[flag_com, 'label']=1

    # 返回DataFrame和下标
    res_data.loc[res_data[res_data['label']==1].index, 'label']=1
    return res_data, res_data[res_data['label']==1].index


def vertical_process(current_data,low_coef=1.5, up_coef=1.5):
    res_data = current_data[current_data['label']!=1].copy()
    res_data = res_data.sort_values(by=['Power'])
    st = np.arange(0, 25.5, 0.5)

    # 对每个风速段
    for i in range(1, st.shape[0]):
        p_min, p_max = st[i-1], st[i]
        flag = (res_data['WindSpeed']>=p_min) & (res_data['WindSpeed']<=p_max)
        sub_data = res_data[flag]
        # print(sub_data)

        if sub_data.shape[0]<4:
            # continue
            f_1 = f_u = 0
        else:
            f_1, f_u = get_F(sub_data['Power'],low_coef,up_coef)

        # 超过[F1, Fu]的都算作异常
        flag_1_u = (res_data['Power']<f_1) | (res_data['Power']>f_u)
        flag_com = flag & flag_1_u
        res_data.loc[flag_com, 'label']=1

    # 返回DataFrame和下标
    res_data.loc[res_data[res_data['label']==1].index, 'label']=1
    return res_data, res_data[res_data['label']==1].index


### 去除功率为负的异常点函数

In [8]:
def get_cut_int_out(current_rotor):
    '''返回当前风电机切入和切出风速
    '''
    cut_int=cut_out=0
    if current_rotor in [1,2,3,4,6,7,8,9,10]:
        cut_in = 3
        cut_out = 25
    elif current_rotor ==5:
        cut_in = 3
        cut_out = 22
    elif current_rotor == 11:
        cut_in = 2.5
        cut_out = 19
    elif current_rotor == 12:
        cut_in = 3
        cut_out = 22
    return cut_in, cut_out
    
def judge_power(row, cut_in, cut_out):
    # 切入以下的且功率小于0的暂且认为正常
    if row['Power']<0 and row['WindSpeed']<cut_in:
        return 0
    # 切入以下功率大于0的为异常
    elif row['Power']>=0 and row['WindSpeed']<cut_in:
        return 1
    # 切入风速以上，切出风速以下，power小于等于0的为异常
    elif row['Power']<=1 and row['WindSpeed']<cut_out and row['WindSpeed']>cut_in:
        return 1
    # 切出风速以上，power大于0的为异常
    elif row['Power']>0 and row['WindSpeed']>cut_out:
        return 1
    # 切出风速以上，power小于0的暂且认为是正常
    elif row['Power']<=0 and row['WindSpeed']>cut_out:
        return 0
    else:
        return -1

def remove_neg_power(current_data, current_rotor):
    '''返回current_data功率异常区域为负数的下标
    '''
    current_data = current_data[current_data['label']!=1]
    cut_in, cut_out = get_cut_int_out(current_rotor)
    tmp_data = current_data
    tmp_data['label'] = tmp_data.apply(judge_power, axis=1, args=(cut_in, cut_out))

    return tmp_data, tmp_data[tmp_data['label']==1].index

### 定义去除中部限功率数据函数

In [9]:
def remove_mid_anomaly(current_data, max_a=0.5, max_diff=3,max_time=12,min_speed=0.3,power_step=6):
    '''传入的是待处理的风电机数据，在函数内部对功率进行划分
    max_a = 0.5 # y=ax+b，这个设置基本没用，都是接近0的
    max_diff = 3 # 3kw
    max_time = 12 # 6*10s 
    min_speed = 0.3 # 0.5m/s
    '''
    res_data = current_data[current_data['label']!=1].copy()
    st = np.arange(0,1900,power_step)

    res_idx = np.array([])

    for i in range(1, st.shape[0]):
        p_min, p_max = st[i-1], st[i]
        flag = (res_data['Power']>=p_min) & (res_data['Power']<=p_max)
        sub_data = res_data[flag]
        
        # 拟合y=ax+b
        x = sub_data['WindSpeed']
        y = sub_data['Power']
        if(x.shape[0]==0):
            continue
        a,b = np.polyfit(x,y,1)
        # print("{}-{}: {}个点， y={:.2f}x+{:.2f}".format(p_min, p_max, x.shape[0], a, b))

        # 处理符合条件的点
        # if(a>max_a):
        #     continue

        # 和拟合曲线差在max_diff以内
        sub_data['diff'] = np.abs(sub_data['WindSpeed']*a+b-sub_data['Power'])
        diff_idx = sub_data[sub_data['diff']<max_diff].index
        tmp_idx = []

        # diff_idx[0]忽略
        for i in range(1, diff_idx.shape[0]):
            cur_idx, pre_idx = diff_idx[i],diff_idx[i-1]
            cur_windspeed, pre_windspeed = sub_data.loc[cur_idx, 'WindSpeed'], sub_data.loc[pre_idx, 'WindSpeed']

            # print("idx: {}-{}, diff_idx: {}, diff_speed: {}, diff_power: {}".format(pre_idx, cur_idx,cur_idx-pre_idx, cur_windspeed-pre_windspeed, sub_data.loc[cur_idx, 'Power']-sub_data.loc[pre_idx, 'Power']))
            # 时间差在max_time以内，且windspeed差在min_speed以上
            if(cur_idx-pre_idx<max_time and cur_windspeed-pre_windspeed>min_speed):
                tmp_idx.append(cur_idx)
                # print("idx: {}-{}, diff_idx: {}, diff_speed: {}, diff_power: {}".format(pre_idx, cur_idx,cur_idx-pre_idx, cur_windspeed-pre_windspeed, sub_data.loc[cur_idx, 'Power']-sub_data.loc[pre_idx, 'Power']))
            
        tmp_idx = np.array(tmp_idx)
        res_idx = np.concatenate((res_idx, tmp_idx))

    res_data.loc[res_idx,'label'] = 1
    return res_data, res_idx

### 绘图函数

In [10]:
def plot_current_data(current_data,rotor_num,save=False,file_path=None):
    plt.figure(figsize=(16,4))

    plt.subplot(141)
    g_data = raw_data[raw_data['WindNumber']==rotor_num]
    plt.plot(g_data['WindSpeed'],g_data['Power'], '.y', ms=4,label='origin data')
    plt.ylim((-100,2200))
    plt.xlim((-1,27))
    plt.legend()


    plt.subplot(142)
    g_data = current_data
    plt.plot(g_data['WindSpeed'],g_data['Power'], '.g', ms=4,label='origin data')
    plt.ylim((-100,2200))
    plt.xlim((-1,27))
    plt.legend()

    plt.subplot(143)
    g_data = current_data.loc[current_data['label']!=1]
    plt.plot(g_data['WindSpeed'],g_data['Power'], '.b', ms=4,label='normal data')
    plt.ylim((-100,2200))
    plt.xlim((-1,27))
    plt.legend()

    plt.subplot(144)
    g_data = current_data.loc[current_data['label']==1]
    plt.plot(g_data['WindSpeed'],g_data['Power'], '.r', ms=4,label='anomaly data')
    plt.ylim((-100,2200))
    plt.xlim((-1,27))
    plt.legend()

    if(save and file_path is not None):
        plt.savefig(file_path)
    plt.show()

### 处理当前数据函数

一整套流程，先去底部异常功率，再去中部异常，再横向四分法，最后纵向四分法，会自动更新到raw_data

In [11]:
def process_current_data(current_data):
    # 去底部异常
    removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
    current_data.loc[neg_power_idx, 'label']=1
    print("bottom: {}".format(neg_power_idx.shape))

    # 去中部异常
    removed_mid_data, mid_anomaly_idx = remove_mid_anomaly(current_data)
    current_data.loc[mid_anomaly_idx, 'label']=1
    print("mid: {}".format(mid_anomaly_idx.shape))

    # 横向四分法
    horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data)
    current_data.loc[horizonal_anomaly_idx, 'label']=1
    print("horizonal: {}".format(horizonal_anomaly_idx.shape))

    # 纵向四分法
    vertical_data, vertical_anomaly_idx = vertical_process(current_data)
    current_data.loc[vertical_anomaly_idx, 'label']=1
    print("vertical: {}".format(vertical_anomaly_idx.shape))

    # 更新到raw_data
    current_anomaly_idx = current_data[current_data['label']==1].index
    print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))
    
    raw_data.loc[current_anomaly_idx, 'label']=1
    print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

## 测试各项函数

### 测试四分法

current_rotor = 1
current_data = raw_data[raw_data['WindNumber']==current_rotor]

horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data)
current_data.loc[horizonal_anomaly_idx, 'label']=1

vertical_data, vertical_anomaly_idx = vertical_process(current_data)
current_data.loc[vertical_anomaly_idx, 'label']=1

len(horizonal_anomaly_idx), len(vertical_anomaly_idx)

# horizonal test
plt.figure(figsize=(16,5))
plt.subplot(131)
plt.plot(current_data['WindSpeed'],current_data['Power'], '.r', ms=4,label='origin data')
plt.legend()
plt.subplot(132)
g_data = horizonal_data.loc[horizonal_anomaly_idx]
plt.plot(g_data['WindSpeed'],g_data['Power'], '.r', ms=4,label='anomaly data')
plt.legend()
plt.subplot(133)
g_data = horizonal_data.loc[horizonal_data['label']!=1]
plt.plot(g_data['WindSpeed'],g_data['Power'], '.r', ms=4,label='normal data')
plt.legend()

# vertical test
plt.figure(figsize=(16,5))
plt.subplot(131)
plt.plot(current_data['WindSpeed'],current_data['Power'], '.r', ms=4,label='origin data')
plt.legend()
plt.subplot(132)
g_data = vertical_data.loc[vertical_anomaly_idx]
plt.plot(g_data['WindSpeed'],g_data['Power'], '.r', ms=4,label='anomaly data')
plt.legend()
plt.subplot(133)
g_data = vertical_data.loc[vertical_data['label']!=1]
plt.plot(g_data['WindSpeed'],g_data['Power'], '.r', ms=4,label='normal data')
plt.legend()

### 测试去除底部异常功率数据

current_rotor = 1
current_data = raw_data[raw_data['WindNumber']==current_rotor]
removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
current_data.loc[neg_power_idx, 'label']=1

current_data.shape[0],current_data[current_data['label']!=1].shape[0],current_data[current_data['label']==1].shape[0]

plt.figure(figsize=(12,6))

plt.subplot(121)
g_data = current_data.loc[current_data['label']!=1]
plt.plot(g_data['WindSpeed'],g_data['Power'], '.r', ms=4,label='normal data')
plt.ylim((-100,2200))
plt.legend()

plt.subplot(122)
g_data = current_data.loc[current_data['label']==1]
plt.plot(g_data['WindSpeed'],g_data['Power'], '.r', ms=4,label='anomaly data')
plt.ylim((-100,2200))
plt.legend()
plt.show()

x = np.arange(11)
y = 0.5*x+1
plt.xlim((0,10))
plt.ylim((0,10))
plt.plot(x,y)

### 测试去除mid限功率函数，基于Norm Data继续改

current_rotor = 1
current_data = raw_data[raw_data['WindNumber']==current_rotor]

tmp_data, mid_anomaly_idx = remove_mid_anomaly(current_data)
current_data.loc[mid_anomaly_idx, 'label']=1

current_data.shape[0],current_data[current_data['label']!=1].shape[0],current_data[current_data['label']==1].shape[0]

plt.figure(figsize=(12,6))

plt.subplot(121)
g_data = current_data.loc[current_data['label']!=1]
plt.plot(g_data['WindSpeed'],g_data['Power'], '.r', ms=4,label='normal data')
plt.ylim((0,2200))
plt.legend()

plt.subplot(122)
g_data = current_data.loc[current_data['label']==1]
plt.plot(g_data['WindSpeed'],g_data['Power'], '.r', ms=4,label='anomaly data')
plt.ylim((0,2200))
plt.legend()
plt.show()

## 正式开始处理数据

### 一号

In [None]:
current_rotor = 1
current_data = raw_data[raw_data['WindNumber']==current_rotor]

process_current_data(current_data)

In [None]:
is_save=True
line_num = 1
fp = "./figures/vp_{}_{}.png".format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

### 二号

In [None]:
current_rotor = 2
current_data = raw_data[raw_data['WindNumber']==current_rotor]
# process_current_data(current_data)

In [None]:
# 去底部异常
removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
current_data.loc[neg_power_idx, 'label']=1
print("bottom: {}".format(neg_power_idx.shape))

# 去除最下面的线
low_idx = current_data[((current_data['Month']==1) | (current_data['Month']==2) | (current_data['Month']==12)) & (current_data['WindSpeed']>9)].index
current_data.loc[low_idx, 'label']=1
print("low line: {}".format(low_idx.shape))

# 去中部异常
removed_mid_data, mid_anomaly_idx = remove_mid_anomaly(current_data, max_time=9, max_diff=8, power_step=20)
current_data.loc[mid_anomaly_idx, 'label']=1
print("mid: {}".format(mid_anomaly_idx.shape))

# 横向四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,low_coef=2.5,up_coef=2.8)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# # 纵向四分法
vertical_data, vertical_anomaly_idx = vertical_process(current_data[current_data['WindSpeed']>8])
current_data.loc[vertical_anomaly_idx, 'label']=1
print("vertical: {}".format(vertical_anomaly_idx.shape))

# 横向四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,low_coef=2,up_coef=1)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))


In [None]:
is_save=True
line_num = 1
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

### 三号

- **三号风机两条曲线数据**

1. 第一条到达额定功率2000曲线
    - 5,6,7月，4月12-31号，8月1-21号，10月29号\[99:], 30号\[:50]
2. 第二条功率1000左右的
    - 1,2,3,9,11,12, 4月1-11号，8月22-31号，10月1-28号,29号\[:99], 30号\[50:]

#### 3.1 三号第一条线

In [None]:
current_rotor = 3
# 处理第一条线 5,6,7月，4月18-31号，8月1-21号
current_data = raw_data[raw_data['WindNumber']==current_rotor]

oct29_data = current_data[((current_data['Month']==10) & (current_data['Day']==29))][99:]
oct30_data = current_data[((current_data['Month']==10) & (current_data['Day']==30))][:50]

current_data = current_data[(current_data['Month']==5) | (current_data['Month']==6) | (current_data['Month']==7) | ( (current_data['Month']==4) & (current_data['Day']>=12) ) | ( (current_data['Month']==8) & (current_data['Day']<=21) )]
current_data = pd.concat([current_data, oct29_data, oct30_data])

# process_current_data(current_data)

In [None]:
# 去底部异常
removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
current_data.loc[neg_power_idx, 'label']=1
print("bottom: {}".format(neg_power_idx.shape))
# 应用横部四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,low_coef=1.5, up_coef=1.5)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))


vertical_data, vertical_anomaly_idx = vertical_process(current_data, low_coef=2, up_coef=2)
current_data.loc[vertical_anomaly_idx, 'label']=1
print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
is_save=True
line_num = 1
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

#### 3.2 三号第二条线

In [None]:
current_rotor = 3
# 处理第二条线 1,2,3,9,11,12, 4月1-11号，8月22-31号，10月1-28号,29号[:99], 30号[50:]
current_data = raw_data[raw_data['WindNumber']==current_rotor]

oct29_data = current_data[((current_data['Month']==10) & (current_data['Day']==29))][:99]
oct30_data = current_data[((current_data['Month']==10) & (current_data['Day']==30))][50:]

current_data = current_data[(current_data['Month']==1) | (current_data['Month']==2) | (current_data['Month']==3) | (current_data['Month']==9) | (current_data['Month']==11) | (current_data['Month']==12) | ( (current_data['Month']==4) & (current_data['Day']<12) ) | ( (current_data['Month']==8) & (current_data['Day']>21) ) | ( (current_data['Month']==10) & (current_data['Day']<29) )]
current_data = pd.concat([current_data, oct29_data, oct30_data])

# process_current_data(current_data)

In [None]:
# # 去底部异常
# removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
# current_data.loc[neg_power_idx, 'label']=1
# print("bottom: {}".format(neg_power_idx.shape))
# # 应用横部四分法
# horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,low_coef=1.7, up_coef=1.9)
# current_data.loc[horizonal_anomaly_idx, 'label']=1
# print("horizonal: {}".format(horizonal_anomaly_idx.shape))

In [None]:
current_data['label']=1

In [None]:
is_save=True
line_num = 2
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 全部当作异常点
current_anomaly_idx = current_data.index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

### 四号

In [None]:
current_rotor = 4
current_data = raw_data[raw_data['WindNumber']==current_rotor]
# process_current_data(current_data)

In [None]:
# 去底部异常
removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
current_data.loc[neg_power_idx, 'label']=1
print("bottom: {}".format(neg_power_idx.shape))

# 横向四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data, low_coef=4, up_coef=3)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# 纵向四分法
vertical_data, vertical_anomaly_idx = vertical_process(current_data, low_coef=2, up_coef=2)
current_data.loc[vertical_anomaly_idx, 'label']=1
print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
is_save=True
line_num = 1
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

### 五号

In [None]:
current_rotor = 5
current_data = raw_data[raw_data['WindNumber']==current_rotor]
# process_current_data(current_data)

In [None]:
# 去底部异常
removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
current_data.loc[neg_power_idx, 'label']=1
print("bottom: {}".format(neg_power_idx.shape))

# 去中部异常
removed_mid_data, mid_anomaly_idx = remove_mid_anomaly(current_data)
current_data.loc[mid_anomaly_idx, 'label']=1
print("mid: {}".format(mid_anomaly_idx.shape))

# 横向四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data, low_coef=5, up_coef=4)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))


In [None]:
# 纵向四分法
vertical_data, vertical_anomaly_idx = vertical_process(current_data, low_coef=1.5, up_coef=3)
current_data.loc[vertical_anomaly_idx, 'label']=1
print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
is_save=True
line_num = 1
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

### 六号

1. 第一条到达额定功率2000曲线
    - 9，10月
2. 从下往上第二条功率1000左右的
    - 5月18-31号，6，7，8月
3. 最下面功率800左右的
    - 1，2，3，4，11，12月，5月1-17号

In [None]:
current_rotor = 6
# 处理第一条线 9，10月
current_data = raw_data[raw_data['WindNumber']==current_rotor]
current_data = current_data[(current_data['Month']==9) | (current_data['Month']==10)]
# process_current_data(current_data)

#### 6.1 六号第一条线

In [None]:
vertical_data, vertical_anomaly_idx = vertical_process(current_data)
current_data.loc[vertical_anomaly_idx, 'label']=1
print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
is_save=True
line_num = 1
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

#### 6.2 六号第二条线

In [None]:
current_rotor = 6
# 处理第二条线 5月18-31号，6，7，8月
current_data = raw_data[raw_data['WindNumber']==current_rotor]
current_data = current_data[(current_data['Month']==6) | (current_data['Month']==7) | (current_data['Month']==8) | ( (current_data['Month']==5) & (current_data['Day']>=18) ) ]
# process_current_data(current_data)

In [None]:
# # 去底部异常
# removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
# current_data.loc[neg_power_idx, 'label']=1
# print("bottom: {}".format(neg_power_idx.shape))

# # 横向四分法
# horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,low_coef=2, up_coef=2.5)
# current_data.loc[horizonal_anomaly_idx, 'label']=1
# print("horizonal: {}".format(horizonal_anomaly_idx.shape))

In [None]:
current_data['label']=1

In [None]:
is_save=True
line_num = 2
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 全部当作异常
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

#### 6.3 六号第三条曲线

In [None]:
current_rotor = 6
# 处理第三条线 1，2，3，4，11，12月，5月1-17号
current_data = raw_data[raw_data['WindNumber']==current_rotor]
current_data = current_data[(current_data['Month']==1) | (current_data['Month']==2) | (current_data['Month']==3)  | (current_data['Month']==4) | (current_data['Month']==11) | (current_data['Month']==12) | ( (current_data['Month']==5) & (current_data['Day']<18) ) ]
# process_current_data(current_data)

In [None]:
# # 去底部异常
# removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
# current_data.loc[neg_power_idx, 'label']=1
# print("bottom: {}".format(neg_power_idx.shape))

# # 去中部异常
# removed_mid_data, mid_anomaly_idx = remove_mid_anomaly(current_data)
# current_data.loc[mid_anomaly_idx, 'label']=1
# print("mid: {}".format(mid_anomaly_idx.shape))

# # 横向四分法
# horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data, low_coef=2.5, up_coef=3.5)
# current_data.loc[horizonal_anomaly_idx, 'label']=1
# print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# # 纵向四分法
# vertical_data, vertical_anomaly_idx = vertical_process(current_data, low_coef=1.5, up_coef=1.5)
# current_data.loc[vertical_anomaly_idx, 'label']=1
# print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
current_data['label']=1

In [None]:
is_save=True
line_num = 3
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

### 七号

1. 左边曲线
    - 9月10-17号
2. 右边曲线
    - 除了9月，加上0月1-9号，>=18号

#### 7.1 七号第一条

In [None]:
current_rotor = 7
# 处理左边，9月10-17
current_data = raw_data[raw_data['WindNumber']==current_rotor]
current_data = current_data[( (current_data['Month']==9) & (current_data['Day']<=17) & (current_data['Day']>=10))]
# process_current_data(current_data)

In [None]:
# 去底部异常
# removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
# current_data.loc[neg_power_idx, 'label']=1
# print("bottom: {}".format(neg_power_idx.shape))

# # 纵向四分法
# vertical_data, vertical_anomaly_idx = vertical_process(current_data, low_coef=1.5, up_coef=1.5)
# current_data.loc[vertical_anomaly_idx, 'label']=1
# print("vertical: {}".format(vertical_anomaly_idx.shape))

# 全部当作异常
current_data['label']=1

#### 如果加上去掉功率异常的, 会导致下面去掉一条线, 是因为在切入风速以内,有power

In [None]:
is_save=True
line_num = 1
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

#### 7.2 七号第二条

In [None]:
current_rotor = 7
# 处理右边，除了9月，加上9月1-9号，>=18号
current_data = raw_data[raw_data['WindNumber']==current_rotor]

sep_data = current_data[( (current_data['Month']==9) & (current_data['Day']>=18) ) | ((current_data['Month']==9) & (current_data['Day']<10))]

current_data = current_data[( (current_data['Month']==1) | (current_data['Month']==2) | (current_data['Month']==3) | (current_data['Month']==4) | (current_data['Month']==5) | (current_data['Month']==6) | (current_data['Month']==7) | (current_data['Month']==8) | (current_data['Month']==10) | (current_data['Month']==11) | (current_data['Month']==12))]
current_data = pd.concat([current_data, sep_data])

# process_current_data(current_data)

In [None]:
# 去底部异常
removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
current_data.loc[neg_power_idx, 'label']=1
print("bottom: {}".format(neg_power_idx.shape))

# 去中部异常
removed_mid_data, mid_anomaly_idx = remove_mid_anomaly(current_data)
current_data.loc[mid_anomaly_idx, 'label']=1
print("mid: {}".format(mid_anomaly_idx.shape))

# 横向四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data, low_coef=1.5, up_coef=1.7)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# 纵向四分法
vertical_data, vertical_anomaly_idx = vertical_process(current_data)
current_data.loc[vertical_anomaly_idx, 'label']=1
print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
is_save=True
line_num = 2
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

### 八号

In [None]:
current_rotor = 8
current_data = raw_data[raw_data['WindNumber']==current_rotor]
# process_current_data(current_data)

In [None]:
# 去底部异常
removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
current_data.loc[neg_power_idx, 'label']=1
print("bottom: {}".format(neg_power_idx.shape))

# 去中部异常
removed_mid_data, mid_anomaly_idx = remove_mid_anomaly(current_data,max_diff=2,max_time=6)
current_data.loc[mid_anomaly_idx, 'label']=1
print("mid: {}".format(mid_anomaly_idx.shape))

# 横向四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data, low_coef=1.5, up_coef=2.2)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# 纵向四分法
vertical_data, vertical_anomaly_idx = vertical_process(current_data, up_coef=2.2)
current_data.loc[vertical_anomaly_idx, 'label']=1
print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
is_save=True
line_num = 1
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

### 九号

1. 上边曲线
    - 9，10月，8月>26号
2. 下边曲线
    - 除了9月，10月的月份，其中八月：1-26号

#### 9.1 九号第一条

In [None]:
current_rotor = 9
# 处理左边，9月，10月，8月>26号
current_data = raw_data[raw_data['WindNumber']==current_rotor]
current_data = current_data[( (current_data['Month']==9) | (current_data['Month']==10) | ((current_data['Month']==8) & (current_data['Day']>26)))]
# process_current_data(current_data)

In [None]:
# 去底部异常
removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
current_data.loc[neg_power_idx, 'label']=1
print("bottom: {}".format(neg_power_idx.shape))

# 去中部异常
removed_mid_data, mid_anomaly_idx = remove_mid_anomaly(current_data)
current_data.loc[mid_anomaly_idx, 'label']=1
print("mid: {}".format(mid_anomaly_idx.shape))

# 横向四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,up_coef=2.6)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# # 纵向四分法
vertical_data, vertical_anomaly_idx = vertical_process(current_data)
current_data.loc[vertical_anomaly_idx, 'label']=1
print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
is_save=True
line_num = 1
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

#### 9.2 九号第二条线

In [None]:
current_rotor = 9
# 处理下边，除了9月，10月的月份，其中八月：1-26号
current_data = raw_data[raw_data['WindNumber']==current_rotor]

current_data = current_data[( (current_data['Month']==1) | (current_data['Month']==2) | (current_data['Month']==3) | (current_data['Month']==4) | (current_data['Month']==5) | (current_data['Month']==6) | (current_data['Month']==7) | (current_data['Month']==11) | (current_data['Month']==12) | ((current_data['Month']==8) & (current_data['Day']<=26)) )]

# process_current_data(current_data)

In [None]:
# 去底部异常
removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
current_data.loc[neg_power_idx, 'label']=1
print("bottom: {}".format(neg_power_idx.shape))

# 去中部异常
removed_mid_data, mid_anomaly_idx = remove_mid_anomaly(current_data,max_time=6,max_diff=2)
current_data.loc[mid_anomaly_idx, 'label']=1
print("mid: {}".format(mid_anomaly_idx.shape))

# 横向四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,low_coef=2,up_coef=2.5)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# 纵向四分法
vertical_data, vertical_anomaly_idx = vertical_process(current_data,low_coef=2, up_coef=2)
current_data.loc[vertical_anomaly_idx, 'label']=1
print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
is_save=True
line_num = 2
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

### 十月

1. 左边曲线
    - 12, 1月<=7日，11月>=12日
2. 右边曲线
    - 2, 3, 4, 5, 6, 7, 8, 9, 10，1月>7日，11月<12日

#### 10.1 十号第一条

In [None]:
current_rotor = 10
# 处理左边，12, 1月<=7日，11月>=12日
current_data = raw_data[raw_data['WindNumber']==current_rotor]
current_data = current_data[( (current_data['Month']==12) | ((current_data['Month']==1)&(current_data['Day']<=7) ) | ((current_data['Month']==11) & (current_data['Day']>=12)))]
# process_current_data(current_data)

In [None]:
# # 去底部异常
# removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
# current_data.loc[neg_power_idx, 'label']=1
# print("bottom: {}".format(neg_power_idx.shape))

# # 去中部异常
# removed_mid_data, mid_anomaly_idx = remove_mid_anomaly(current_data)
# current_data.loc[mid_anomaly_idx, 'label']=1
# print("mid: {}".format(mid_anomaly_idx.shape))

# # # 横向四分法
# horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,low_coef=2,up_coef=2)
# current_data.loc[horizonal_anomaly_idx, 'label']=1
# print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# # # 纵向四分法
# vertical_data, vertical_anomaly_idx = vertical_process(current_data,low_coef=2, up_coef=2)
# current_data.loc[vertical_anomaly_idx, 'label']=1
# print("vertical: {}".format(vertical_anomaly_idx.shape))

# # # 横向四分法
# horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data)
# current_data.loc[horizonal_anomaly_idx, 'label']=1
# print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# # # 纵向四分法
# vertical_data, vertical_anomaly_idx = vertical_process(current_data)
# current_data.loc[vertical_anomaly_idx, 'label']=1
# print("vertical: {}".format(vertical_anomaly_idx.shape))
# # # 纵向四分法
# vertical_data, vertical_anomaly_idx = vertical_process(current_data)
# current_data.loc[vertical_anomaly_idx, 'label']=1
# print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
# 全部作为异常
current_data['label']=1
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

In [None]:
is_save=True
line_num = 1
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

#### 10.2 十号第二条

In [None]:
current_rotor = 10
# 处理右边，2, 3, 4, 5, 6, 7, 8, 9, 10，1月>7日，11月<12日
current_data = raw_data[raw_data['WindNumber']==current_rotor]

current_data = current_data[( ((current_data['Month']==1)&(current_data['Day']>7)) | (current_data['Month']==2) | (current_data['Month']==3) | (current_data['Month']==4) | (current_data['Month']==5) | (current_data['Month']==6) | (current_data['Month']==7) | (current_data['Month']==8) | (current_data['Month']==9) | (current_data['Month']==10)| ((current_data['Month']==11)&(current_data['Day']<12)) )]

# process_current_data(current_data)

In [None]:
# 去底部异常
removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
current_data.loc[neg_power_idx, 'label']=1
print("bottom: {}".format(neg_power_idx.shape))

# 去中部异常
removed_mid_data, mid_anomaly_idx = remove_mid_anomaly(current_data,max_time=6,max_diff=2)
current_data.loc[mid_anomaly_idx, 'label']=1
print("mid: {}".format(mid_anomaly_idx.shape))

# # 横向四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,low_coef=2,up_coef=2.1)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# # 纵向四分法
vertical_data, vertical_anomaly_idx = vertical_process(current_data,low_coef=2,up_coef=1.9)
current_data.loc[vertical_anomaly_idx, 'label']=1
print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
is_save=True
line_num = 2
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

### 十一号

In [None]:
current_rotor = 11
current_data = raw_data[raw_data['WindNumber']==current_rotor]
# process_current_data(current_data)

In [None]:
# 去底部异常
removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
current_data.loc[neg_power_idx, 'label']=1
print("bottom: {}".format(neg_power_idx.shape))

# 去中部异常
removed_mid_data, mid_anomaly_idx = remove_mid_anomaly(current_data)
current_data.loc[mid_anomaly_idx, 'label']=1
print("mid: {}".format(mid_anomaly_idx.shape))

# # 横向四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,low_coef=2.5,up_coef=3)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# # # 纵向四分法
# vertical_data, vertical_anomaly_idx = vertical_process(current_data,low_coef=2,up_coef=2)
# current_data.loc[vertical_anomaly_idx, 'label']=1
# print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
is_save=True
line_num = 1
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

### 十二号

In [None]:
current_rotor = 12
current_data = raw_data[raw_data['WindNumber']==current_rotor]
# process_current_data(current_data)

In [None]:
# 去底部异常
# removed_neg_data, neg_power_idx = remove_neg_power(current_data, current_rotor)
# current_data.loc[neg_power_idx, 'label']=1
# print("bottom: {}".format(neg_power_idx.shape))

# 去中部异常
removed_mid_data, mid_anomaly_idx = remove_mid_anomaly(current_data)
current_data.loc[mid_anomaly_idx, 'label']=1
print("mid: {}".format(mid_anomaly_idx.shape))

# 横向四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,low_coef=3,up_coef=3)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# 横向四分法
horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,low_coef=1.5,up_coef=1.5)
current_data.loc[horizonal_anomaly_idx, 'label']=1
print("horizonal: {}".format(horizonal_anomaly_idx.shape))

# # 纵向四分法
# vertical_data, vertical_anomaly_idx = vertical_process(current_data,low_coef=2,up_coef=2)
# current_data.loc[vertical_anomaly_idx, 'label']=1
# print("vertical: {}".format(vertical_anomaly_idx.shape))

In [None]:
is_save=True
line_num = 2
fp = './figures/vp_{}_{}.png'.format(current_rotor,line_num)
plot_current_data(current_data,rotor_num=current_rotor,save=is_save,file_path=fp)

In [None]:
# 更新到raw_data
current_anomaly_idx = current_data[current_data['label']==1].index
print("sum of current anomaly data: {}".format(current_anomaly_idx.shape))

raw_data.loc[current_anomaly_idx, 'label']=1
print("sum of raw data anomaly data: {}".format(np.sum(raw_data['label']==1)))

```
sum of current anomaly data: (10565,)
sum of raw data anomaly data: 158009
```

## 保存数据

In [None]:
raw_data.loc[raw_data['label']==-1, 'label']=0

In [None]:
# current_data = raw_data
# # 横向四分法
# horizonal_data, horizonal_anomaly_idx = horizonal_process(current_data,low_coef=1.5,up_coef=1.5)
# current_data.loc[horizonal_anomaly_idx, 'label']=1
# print("horizonal: {}".format(horizonal_anomaly_idx.shape))

In [None]:
file_num = "9_25"
RES_PATH='./submission/submission_{}.csv'.format(file_num)
# print("未分类数：{}".format(np.sum(raw_data['label'] == -1)))
print("异常点数：{}".format(np.sum(raw_data['label'] == 1)))
print("正常点数：{}".format(np.sum(raw_data['label'] == 0)))

result = raw_data[['WindNumber', 'Time', 'label']]
result.to_csv(RES_PATH, index=None)