## Filter CO2 and FCO2 (Eddypro) Based on High Correlation of Raw CO2 and H2O

#### Import Modules

In [1]:
# Import Modules

import pandas as pd
import numpy as np
import glob
import xarray as xr
from scipy.stats import pearsonr

##### Declaring Variables

In [2]:
# Declaring the time range of the data
date = pd.date_range('2015-12-01 00:00:00', '2015-12-02 23:30:00', freq='30T')

# Declaring new variable for correlation of CO2 and H2O
correlation_CO2_H2O = np.empty((date.size))

##### Import Raw Data of CO2 and H2O --- Calculate Correlation of CO2 and H2O

In [3]:
# Import Raw Data of CO2 and H2O --- Calculate Correlation of CO2 and H2O

for i in range (0,date.size):
    try:
        day = date[i].strftime('%Y-%m-%d')
        hour = date[i].strftime('%H')
        minute = date[i].strftime('%M')
        second = date[i].strftime('%S')
        read_date = day+'T'+hour+minute+second+'_AIU-1552'

        X = pd.read_csv('Data/Raw Data/'+read_date+'/'+read_date+'.data', sep="\t", skiprows=7)

        variables = pd.concat([X['CO2 (umol/mol)'], X['H2O (mmol/mol)']], axis=1)
        correlation_CO2_H2O[i] = variables['CO2 (umol/mol)'].corr(variables['H2O (mmol/mol)'])
    
    except OSError:
        correlation_CO2_H2O[i] = np.NaN
        pass

In [4]:
correlation_CO2_H2O

array([-0.28551159,  0.55843529,  0.44151054,  0.16270475,  0.30626937,
        0.35166264,  0.13247812,  0.14458684,  0.02466383, -0.00102471,
        0.35149208,  0.41123351,  0.6522532 ,  0.29801391,  0.66892367,
        0.31509974,  0.53778184,  0.78705546,  0.23746226,  0.13601373,
        0.6007916 ,  0.55326002,  0.32429175,  0.47462267,  0.19098528,
       -0.11308985, -0.10413338, -0.44511206, -0.36406929, -0.41731019,
        0.03253848,  0.36274535, -0.77638762, -0.27471181,  0.36958752,
        0.85619413,  0.33027429,  0.1480186 ,  0.03936223, -0.67960245,
       -0.87610327, -0.76777195, -0.21351862, -0.82325504, -0.70764128,
       -0.92087537, -0.95067535,  0.84693203,  0.73342716, -0.25659554,
       -0.94830927, -0.52186692, -0.691101  ,  0.56412734,  0.60368335,
        0.56814174,  0.3688593 ,  0.42115779,  0.19002099,  0.56205093,
        0.65473038,  0.30511437,  0.70085592,  0.35198001,  0.11717047,
        0.28399565,  0.70206503,  0.75033269,  0.82482876,      

##### Import Eddypro Data

In [5]:
# Import Eddypro Data

dataframe = pd.read_excel("Data/MCO-MUKA21_full_output.xlsx", index_col=0, skiprows=[1])
date_dataframe = pd.date_range('2015-11-12 00:30:00', '2020-11-13 00:00:00', freq='30T')
dataframe = dataframe.iloc[:,[2,13,14,17,23,32,53,73]][np.where(date_dataframe=='2015-12-01 00:00:00')[0][0]:np.where(date_dataframe=='2015-12-03 00:00:00')[0][0]]
dataframe = dataframe.replace(-9999,np.NaN)
dataframe

Unnamed: 0_level_0,CO2,FCO2,FCO2_QC,FH2O,H2O,LE,TA,WS
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-12-01 00:00,404.458,0.226286,1.0,0.013632,31.7512,0.598733,299.617,0.037565
2015-12-01 00:30,411.593,-0.347322,1.0,0.014442,31.4673,0.634436,299.453,0.120988
2015-12-01 01:00,407.405,0.379693,1.0,0.008586,30.7746,0.377301,299.107,0.148611
2015-12-01 01:30,406.759,-0.971385,1.0,0.048276,30.8767,2.121040,299.289,0.079208
2015-12-01 02:00,414.020,-0.233997,1.0,0.028753,31.0918,1.263240,299.336,0.036531
...,...,...,...,...,...,...,...,...
2015-12-02 21:30,410.225,-0.860226,1.0,0.008019,31.9030,0.351920,300.473,0.120181
2015-12-02 22:00,400.333,-0.729159,1.0,,31.8986,,300.741,0.476979
2015-12-02 22:30,400.156,-0.348636,1.0,0.013788,31.9614,0.605087,300.514,0.443016
2015-12-02 23:00,402.298,-1.326610,1.0,,31.9059,,300.401,0.129187


In [6]:
dataframe.corr()

Unnamed: 0,CO2,FCO2,FCO2_QC,FH2O,H2O,LE,TA,WS
CO2,1.0,0.368111,-0.030143,-0.520316,-0.353019,-0.519651,-0.660692,-0.375166
FCO2,0.368111,1.0,,-0.377708,-0.400443,-0.378301,-0.309907,-0.01282
FCO2_QC,-0.030143,,1.0,0.100451,-0.070697,0.10063,-0.019865,0.028044
FH2O,-0.520316,-0.377708,0.100451,1.0,-0.020836,0.999997,0.475234,0.67149
H2O,-0.353019,-0.400443,-0.070697,-0.020836,1.0,-0.021371,0.551871,0.143283
LE,-0.519651,-0.378301,0.10063,0.999997,-0.021371,1.0,0.473799,0.670177
TA,-0.660692,-0.309907,-0.019865,0.475234,0.551871,0.473799,1.0,0.526306
WS,-0.375166,-0.01282,0.028044,0.67149,0.143283,0.670177,0.526306,1.0


##### Filter CO2 and FCO2 (Eddypro) Based on Correlation Value (Between Raw CO2 and H2O) of 0.7

In [7]:
# Eliminate CO2 and FCO2 (Eddypro) with Absolute Correlation Value (Between Raw CO2 and H2O) Higher than 0.7

for i in range(0,date.size):
    if correlation_CO2_H2O[i]<-0.7 or correlation_CO2_H2O[i]>0.7:
        dataframe.iloc[[i],:] = np.NaN
    if dataframe['FCO2_QC'][i] == 2:
        dataframe.iloc[[i],:] = np.NaN

In [8]:
dataframe

Unnamed: 0_level_0,CO2,FCO2,FCO2_QC,FH2O,H2O,LE,TA,WS
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-12-01 00:00,404.458,0.226286,1.0,0.013632,31.7512,0.598733,299.617,0.037565
2015-12-01 00:30,411.593,-0.347322,1.0,0.014442,31.4673,0.634436,299.453,0.120988
2015-12-01 01:00,407.405,0.379693,1.0,0.008586,30.7746,0.377301,299.107,0.148611
2015-12-01 01:30,406.759,-0.971385,1.0,0.048276,30.8767,2.121040,299.289,0.079208
2015-12-01 02:00,414.020,-0.233997,1.0,0.028753,31.0918,1.263240,299.336,0.036531
...,...,...,...,...,...,...,...,...
2015-12-02 21:30,,,,,,,,
2015-12-02 22:00,,,,,,,,
2015-12-02 22:30,,,,,,,,
2015-12-02 23:00,,,,,,,,


In [9]:
dataframe.corr()

Unnamed: 0,CO2,FCO2,FCO2_QC,FH2O,H2O,LE,TA,WS
CO2,1.0,0.167209,,-0.349627,-0.272201,-0.347791,-0.793866,-0.711766
FCO2,0.167209,1.0,,0.208076,-0.454788,0.209144,-0.325214,0.01629
FCO2_QC,,,,,,,,
FH2O,-0.349627,0.208076,,1.0,-0.228536,0.999997,0.464225,0.74883
H2O,-0.272201,-0.454788,,-0.228536,1.0,-0.229691,0.54647,0.104165
LE,-0.347791,0.209144,,0.999997,-0.229691,1.0,0.46241,0.74762
TA,-0.793866,-0.325214,,0.464225,0.54647,0.46241,1.0,0.570037
WS,-0.711766,0.01629,,0.74883,0.104165,0.74762,0.570037,1.0


##### Filter CO2 and FCO2 (Eddypro) Based on Correlation Value (Between Raw CO2 and H2O) of 0.5

In [10]:
# Eliminate CO2 and FCO2 (Eddypro) with Absolute Correlation Value (Between Raw CO2 and H2O) Higher than 0.5

for i in range(0,date.size):
    if correlation_CO2_H2O[i]<-0.5 or correlation_CO2_H2O[i]>0.5:
        dataframe.iloc[[i],:] = np.NaN
    if dataframe['FCO2_QC'][i] == 2:
        dataframe.iloc[[i],:] = np.NaN

In [11]:
dataframe

Unnamed: 0_level_0,CO2,FCO2,FCO2_QC,FH2O,H2O,LE,TA,WS
DateTime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2015-12-01 00:00,404.458,0.226286,1.0,0.013632,31.7512,0.598733,299.617,0.037565
2015-12-01 00:30,,,,,,,,
2015-12-01 01:00,407.405,0.379693,1.0,0.008586,30.7746,0.377301,299.107,0.148611
2015-12-01 01:30,406.759,-0.971385,1.0,0.048276,30.8767,2.121040,299.289,0.079208
2015-12-01 02:00,414.020,-0.233997,1.0,0.028753,31.0918,1.263240,299.336,0.036531
...,...,...,...,...,...,...,...,...
2015-12-02 21:30,,,,,,,,
2015-12-02 22:00,,,,,,,,
2015-12-02 22:30,,,,,,,,
2015-12-02 23:00,,,,,,,,


In [12]:
dataframe.corr()

Unnamed: 0,CO2,FCO2,FCO2_QC,FH2O,H2O,LE,TA,WS
CO2,1.0,0.09958,,-0.471555,-0.124743,-0.470516,-0.763677,-0.733939
FCO2,0.09958,1.0,,0.139919,-0.489148,0.140651,-0.23845,0.076025
FCO2_QC,,,,,,,,
FH2O,-0.471555,0.139919,,1.0,-0.342698,0.999999,0.54466,0.845551
H2O,-0.124743,-0.489148,,-0.342698,1.0,-0.3435,0.382398,-0.095181
LE,-0.470516,0.140651,,0.999999,-0.3435,1.0,0.543564,0.844908
TA,-0.763677,-0.23845,,0.54466,0.382398,0.543564,1.0,0.463903
WS,-0.733939,0.076025,,0.845551,-0.095181,0.844908,0.463903,1.0
