### 数据处理说明
#### 数据来源：2009年微波辐射计亮温数据和探空数据
#### 微波辐射计数据
### 质量控制 QC列代表质量控制的原因
- 0代表正常，可以使用
- 1代表空值，微波辐射计数据和探空数据不匹配，无法训练
- 2代表云天
    - 判断依据
    - 云层中,最小相对湿度值不小于84%,最大相对湿度值不小于87%;
    - 云层中,从云底到云顶的相对湿度跳变值大于3%;
    - 在云顶有负跳变,在云底有正跳变.当以上标准全部满足时才能判定为云层.
- 3代表雨天
    - 判断依据
    - 选择探空时次的前3h~后2h共5h为时间区间,在该区间内降水探测器显示发生降水则认为会影响微波辐射计亮温观测
- 08时Wyoming数据对应08:00~08:30微波辐射计数据平均值

In [28]:
import pandas as pd
import numpy as np
import os
import calendar

# 自定义函数
import search_filepth_module

In [30]:
Year = '2009'

In [32]:
# 获取特定年份全年的所有日期
def getMothDate(year, month):
    """
    返回某年某月的所有日期
    :param year:
    :param month:
    :return:
    """
    date_list = []
    for i in range(calendar.monthrange(year, month)[1] + 1)[1:]:
        str1 = str(year) +'/'+ str("%02d" % month) +'/'+ str("%02d" % i)
        date_list.append(str1)
    return date_list
 
list = []
year = 2009
for month in range(1,13):
    date_list = getMothDate(year, month)
    for date in date_list:
        list.append(date)
# 生成纵坐标为全年日期，横坐标为亮温数据和探空数据的DataFrame表格
statistics = pd.DataFrame(columns=['QC_08','lv1_08','Ground_temperature_08','Temperature_08','Temperature_08_H0','Relative_humidity_08','Sep','QC_20','lv1_20','Ground_temperature_20','Temperature_20','Temperature_20_H0','Relative_humidity_20'],index=list)
statistics[['QC_08','QC_20']] = 0
statistics['Sep'] = '/'

## 微波辐射计数据

In [34]:
microwave_dirpath = r'I:\Data\Personal Data\graduation project\SACOL\microwave\\' + Year
microwave_target_str = 'lv1'
microwave_filepaths = search_filepth_module.search_filepath(microwave_dirpath,microwave_target_str)

In [36]:
# 创建用于存储08和20亮温数据的DataFrame
channel = ['10', 'Tamb(K)', 'Rh(%)', 'Pres(mb)', 'Tir(K)', 'Rain', 'Azim', 'Elev', 'TkBB(K)', ' 22.235', ' 23.035', ' 23.835', ' 26.235', ' 30.000', ' 51.250', ' 52.280', ' 53.850', ' 54.940', ' 56.660', ' 57.290', ' 58.800']
lv1_08 = pd.DataFrame(index=list,columns=channel)
lv1_20 = pd.DataFrame(index=list,columns=channel)

In [38]:
for microwave_filepath in microwave_filepaths:
    lv1_data = pd.read_csv(microwave_filepath,index_col='Record')
    lv1_data.drop(['Unnamed: 23'],axis=1,inplace=True)
    date = microwave_filepath[70:80].replace('-','/')

    num_08 = 0
    num_20 = 0
    flag_08 = 0
    flag_20 = 0
    yu_08 = 0
    yu_20 = 0
    for i in range(lv1_data.shape[0]):
        # print(lv1_data.loc[i+1,'Date/Time'])
        # 判断是否降雨
        rain_point = lv1_data.loc[i+1,'Date/Time']
        if((rain_point[9:14] >= '05:00') and (rain_point[9:14] <= '10:00') and (lv1_data.loc[i+1,'Rain'] == 'Y')):
            yu_08 = 'Y'
        if((rain_point[9:14] >= '17::00') and (rain_point[9:14] <= '22::00') and (lv1_data.loc[i+1,'Rain'] == 'Y')):
            yu_20 = 'Y'
        
        # 计算测试点至测试点后半小时内各通道平均值
        point = lv1_data.loc[i+1,'Date/Time']
        if((point[9:14] >= '08:00') and (point[9:14] <= '08:30')):
            num_08 += 1
            if(flag_08 == 0):
                flag_08 = i+1
        if((point[9:14] >= '20:00') and (point[9:14] <= '20:30')):
            num_20 += 1
            if(flag_20 == 0):
                flag_20 = i+1
    
    # 统计微波辐射计数据量
    statistics.loc[date,'lv1_08'] = num_08
    statistics.loc[date,'lv1_20'] = num_20
    # 统计是否有雨
    if(yu_08 == 'Y'):
        statistics.loc[date,'QC_08'] = 3
    if(yu_20 == 'Y'):
        statistics.loc[date,'QC_20'] = 3

    # 汇总数据(lv1_08,lv1_20)
    if(flag_08 != 0):
        lv1_08.loc[date,] = lv1_data.iloc[flag_08-1:flag_08+num_08-1,].mean()
    if(flag_20 != 0):
        lv1_20.loc[date,] = lv1_data.iloc[flag_20-1:flag_20+num_20-1,].mean()

    # 载入微波辐射计地面温度传感器数值
    statistics.loc[date,'Ground_temperature_08'] = lv1_08.loc[date,'Tamb(K)']
    statistics.loc[date,'Ground_temperature_20'] = lv1_20.loc[date,'Tamb(K)']

## 探空数据

In [40]:
Wyoming_dirpath = r'I:\Data\Personal Data\graduation project\yuzhongzhan\FSL1'
Wyoming_target_str = Year
Wyoming_filepaths = []
for root,dirs,files in os.walk(Wyoming_dirpath):
    for file in files:
        # print(file[30:34])
        if(Wyoming_target_str == file[30:34]):
            Wyoming_filepath = os.path.join(root,file)
            Wyoming_filepaths.append(Wyoming_filepath)

In [41]:
height = ['0.00','0.10','0.20','0.30','0.40','0.50','0.60','0.70','0.80','0.90','1.00','1.25','1.50','1.75','2.00','2.25','2.50','2.75','3.00','3.25','3.50','3.75','4.00','4.25','4.50','4.75','5.00','5.25','5.50','5.75','6.00','6.25','6.50','6.75','7.00','7.25','7.50','7.75','8.00','8.25','8.50','8.75','9.00','9.25','9.50','9.75','10.00']
Temperature_08 = pd.DataFrame(index=list,columns=height)
Temperature_20 = pd.DataFrame(index=list,columns=height)
Relative_humidity_08 = pd.DataFrame(index=list,columns=height)
Relative_humidity_20 = pd.DataFrame(index=list,columns=height)

In [42]:
for Wyoming_filepath in Wyoming_filepaths:
    head_col = ['Press','Altitude','Temperature','Relative_humidity','NaN1','NaN2','NaN3']
    Wyoming_data = pd.read_table(Wyoming_filepath,sep='\s+',skiprows=6,names=head_col)
    Wyoming_data.index = height
    # Wyoming_data['Altitude'] = Wyoming_data['Altitude']-1965
    Wyoming_date = Wyoming_filepath[88:92]+'/'+Wyoming_filepath[92:94]+'/'+Wyoming_filepath[94:96]
    print(Wyoming_date)
    if(Wyoming_filepath[96:98] == '00'):
        # 统计可用数据个数
        statistics.loc[Wyoming_date,'Temperature_08'] = Wyoming_data.loc[Wyoming_data['Temperature']>-100].Temperature.count()
        # 集中数据
        Temperature_08.loc[Wyoming_date,] = Wyoming_data['Temperature']
        # 统计可用数据个数
        statistics.loc[Wyoming_date,'Relative_humidity_08'] = Wyoming_data.loc[Wyoming_data['Relative_humidity']>-100].Temperature.count()
        # 集中数据
        Relative_humidity_08.loc[Wyoming_date,] = Wyoming_data['Relative_humidity']
    elif(Wyoming_filepath[96:98] == '12'):
        # 统计可用数据个数
        statistics.loc[Wyoming_date,'Temperature_20'] = Wyoming_data.loc[Wyoming_data['Temperature']>-100].Temperature.count()
        # 集中数据
        Temperature_20.loc[Wyoming_date,] = Wyoming_data['Temperature']
        # 统计可用数据个数
        statistics.loc[Wyoming_date,'Relative_humidity_20'] = Wyoming_data.loc[Wyoming_data['Relative_humidity']>-100].Temperature.count()
        # 集中数据
        Relative_humidity_20.loc[Wyoming_date,] = Wyoming_data['Relative_humidity']
    # 载入探空数据地面温度
    statistics.loc[Wyoming_date,'Temperature_08_H0'] = Temperature_08.loc[Wyoming_date,'0.00']
    statistics.loc[Wyoming_date,'Temperature_20_H0'] = Temperature_20.loc[Wyoming_date,'0.00']

2009/01/01
2009/01/01
2009/01/02
2009/01/02
2009/01/03
2009/01/03
2009/01/04
2009/01/04
2009/01/05
2009/01/05
2009/01/06
2009/01/06
2009/01/07
2009/01/07
2009/01/08
2009/01/08
2009/01/09
2009/01/09
2009/01/10
2009/01/10
2009/01/11
2009/01/11
2009/01/12
2009/01/12
2009/01/13
2009/01/13
2009/01/14
2009/01/14
2009/01/15
2009/01/15
2009/01/16
2009/01/16
2009/01/17
2009/01/17
2009/01/18
2009/01/18
2009/01/19
2009/01/19
2009/01/20
2009/01/20
2009/01/21
2009/01/21
2009/01/22
2009/01/22
2009/01/23
2009/01/23
2009/01/24
2009/01/24
2009/01/25
2009/01/25
2009/01/26
2009/01/26
2009/01/27
2009/01/27
2009/01/28
2009/01/28
2009/01/29
2009/01/29
2009/01/30
2009/01/30
2009/01/31
2009/01/31
2009/02/01
2009/02/01
2009/02/02
2009/02/02
2009/02/03
2009/02/03
2009/02/04
2009/02/04
2009/02/05
2009/02/05
2009/02/06
2009/02/06
2009/02/07
2009/02/07
2009/02/08
2009/02/08
2009/02/09
2009/02/09
2009/02/10
2009/02/10
2009/02/11
2009/02/11
2009/02/12
2009/02/12
2009/02/13
2009/02/13
2009/02/14
2009/02/14
2009/02/15

In [43]:
# 质量控制，有云判定，QC=2
Relative_humidity_08[Relative_humidity_08<84] = np.nan
for i in Relative_humidity_08.index:
    m_ax = Relative_humidity_08.max(axis=1)[i]
    m_in = Relative_humidity_08.min(axis=1)[i]
    if((m_ax>87)and(m_in>84)and((m_ax-m_in)>3)):
        statistics.loc[i,'QC_08'] = 2
Relative_humidity_20[Relative_humidity_20<84] = np.nan
for i in Relative_humidity_20.index:
    m_ax = Relative_humidity_20.max(axis=1)[i]
    m_in = Relative_humidity_20.min(axis=1)[i]
    if((m_ax>87)and(m_in>84)and((m_ax-m_in)>3)):
        statistics.loc[i,'QC_20'] = 2

In [61]:
# 质量控制，数据不匹配，QC=1
statistics[['lv1_08','lv1_20']] = statistics[['lv1_08','lv1_20']].replace(np.nan,0)
for i in statistics.index:
    if(statistics.loc[i,['lv1_08','Temperature_08']].isna().any()):
        statistics.loc[i,'QC_08'] = 1
    if(statistics.loc[i,['lv1_20','Temperature_20']].isna().any()):
        statistics.loc[i,'QC_20'] = 1

In [62]:
# # 储存数据
# statistics.to_csv(r'I:\Data\Personal Data\graduation project\Code\Machine Learning\basicdata\statistics.csv')
# lv1_08.to_csv(r'I:\Data\Personal Data\graduation project\Code\Machine Learning\basicdata\lv1_08.csv')
# lv1_20.to_csv(r'I:\Data\Personal Data\graduation project\Code\Machine Learning\basicdata\lv1_20.csv')
# Temperature_08.to_csv(r'I:\Data\Personal Data\graduation project\Code\Machine Learning\basicdata\Temperature_08.csv')
# Temperature_20.to_csv(r'I:\Data\Personal Data\graduation project\Code\Machine Learning\basicdata\Temperature_20.csv')
# Relative_humidity_08.to_csv(r'I:\Data\Personal Data\graduation project\Code\Machine Learning\basicdata\Relative_humidity_08.csv')
# Relative_humidity_20.to_csv(r'I:\Data\Personal Data\graduation project\Code\Machine Learning\basicdata\Relative_humidity_20.csv')

In [63]:
statistics.to_csv(r'J:\000 - InBox - 下载\Desktop\test.csv')