# Filter airborne 10-sec data
- R Program to filter aircraft data for strong local continental influences, subtract off NOAA in situ SPO, and write out flat text files

In [24]:
library('ncdf4')
library('yaml')

In [25]:
project_tmpdir_obs = read_yaml('../_config_calc.yml')$project_tmpdir_obs
username = Sys.info()['user']
project_tmpdir_obs = gsub('\\{\\{env\\[\'USER\'\\]\\}\\}', username, project_tmpdir_obs)

In [26]:
# read in preprocessed aircraft files from read_aircraft_10s.r
load('HIPPO_10s.RData')
load('ORCAS_10s.RData')
load('ATom_10s.RData')

In [27]:
# calculate datetime variables
hippodt=ISOdatetime(hippomerge$year,hippomerge$mon,hippomerge$day,hippomerge$hour,hippomerge$min,hippomerge$sec,tz='UTC')
orcasdt=ISOdatetime(orcasmerge$year,orcasmerge$mon,orcasmerge$day,orcasmerge$hour,orcasmerge$min,orcasmerge$sec,tz='UTC')
atomdt=ISOdatetime(atommerge$year,atommerge$mon,atommerge$day,atommerge$hour,atommerge$min,atommerge$sec,tz='UTC')

In [28]:
# read in NOAA in situ record from SPO
sponc=nc_open(paste(project_tmpdir_obs,'/obspack_co2_1_GLOBALVIEWplus_v6.0_2020-09-11/data/nc/co2_spo_surface-insitu_1_allvalid.nc',sep=''))
spoco2=data.frame(cbind(ncvar_get(sponc,'time_decimal'),t(ncvar_get(sponc,'time_components')),ncvar_get(sponc,'value')*1E6)) ; colnames(spoco2)=c('date','year','mon','day','hour','min','sec','co2')
qcflag=ncvar_get(sponc,'qcflag'); spoco2$co2[substr(qcflag,1,1)!='.']=NA; spoco2$co2[substr(qcflag,2,2)!='.']=NA
spodt=ISOdatetime(spoco2$year,spoco2$mon,spoco2$day,spoco2$hour,spoco2$min,spoco2$sec,tz='UTC')

# HIPPO

In [29]:
# filter
ints=read.table(paste(project_tmpdir_obs,'/hippo_xsect_filt_datetime.txt',sep=''),header=T) 
startdt=ISOdatetime(ints$startyear,ints$startmon,ints$startday,ints$starthour,ints$startmin,ints$startsec,tz='UTC')
stopdt=ISOdatetime(ints$stopyear,ints$stopmon,ints$stopday,ints$stophour,ints$stopmin,ints$stopsec,tz='UTC')
blfilt=rep(T,nrow(hippomerge))
for(i in c(1:nrow(ints))){
	blfilt[difftime(hippodt,startdt[i])>=0&difftime(hippodt,stopdt[i])<=0]=F
}
hippodt=hippodt[blfilt]
hippomerge=hippomerge[blfilt,]
print(paste('Filtered ',sum(!blfilt),' of ',length(blfilt),' HIPPO obs (',round(sum(!blfilt)/length(blfilt)*100,1),'%)',sep=''))

[1] "Filtered 2453 of 156551 HIPPO obs (1.6%)"


In [30]:
# calculate differences
hippomerge$co2mspo=round(hippomerge$co2-approx(as.POSIXct(spodt),spoco2$co2,as.POSIXct(hippodt))$y,3) ## co2 = 'CO2.X'
hippomerge$co2mqcls=round(hippomerge$co2-hippomerge$co2qcls,3)
hippomerge$co2moms=round(hippomerge$co2-hippomerge$co2oms,3)
hippomerge$co2mao2=round(hippomerge$co2-hippomerge$co2ao2,3)
hippomerge$ch4mucats=round(hippomerge$ch4qcls-hippomerge$ch4ucats,3)
hippomerge$ch4mpanther=round(hippomerge$ch4qcls-hippomerge$ch4panther,3)

In [31]:
# write out
write(names(hippomerge),'../data/aircraft-obs/HIPPO_SO_mSPO.txt',ncol=ncol(hippomerge))
write(t(hippomerge),'../data/aircraft-obs/HIPPO_SO_mSPO.txt',ncol=ncol(hippomerge),append=T)

print(apply(!is.na(hippomerge),2,sum))

       year         mon         day        hour         min         sec 
     154098      154098      154098      154098      154098      154098 
       camp         flt        prof         lat         lon         alt 
     154098      154098      154098      154098      154098      154098 
   pressure       theta         co2     co2qcls      co2oms      co2ao2 
     153643      153642      128532       99731      111351      123678 
    ch4qcls    ch4ucats  ch4panther  sf6panther    sf6ucats      sf6pfp 
     109569        5054        4367       10500       12982        1337 
      strat      h2oref      n2oref       o3ref     co2mspo    co2mqcls 
     154098      149942      111218      150933      128532       97975 
    co2moms     co2mao2   ch4mucats ch4mpanther 
      99249      103284        3696        3275 


# ORCAS

In [32]:
# filter
ints=read.table(paste(project_tmpdir_obs,'/orcas_xsect_filt_datetime.txt',sep=''),header=T)
startdt=ISOdatetime(ints$startyear,ints$startmon,ints$startday,ints$starthour,ints$startmin,ints$startsec,tz='UTC')
stopdt=ISOdatetime(ints$stopyear,ints$stopmon,ints$stopday,ints$stophour,ints$stopmin,ints$stopsec,tz='UTC')
blfilt=rep(T,nrow(orcasmerge))
for(i in c(1:nrow(ints))){
    blfilt[difftime(orcasdt,startdt[i])>=0&difftime(orcasdt,stopdt[i])<=0]=F
}
orcasdt=orcasdt[blfilt]
orcasmerge=orcasmerge[blfilt,]
print(paste('Filtered ',sum(!blfilt),' of ',length(blfilt),' ORCAS obs (',round(sum(!blfilt)/length(blfilt)*100,1),'%)',sep=''))

[1] "Filtered 1943 of 46646 ORCAS obs (4.2%)"


In [33]:
# calculate differences
orcasmerge$co2mspo=round(orcasmerge$co2-approx(as.POSIXct(spodt),spoco2$co2,as.POSIXct(orcasdt))$y,2) ## co2 = 'CO2.X'
orcasmerge$co2mqcls=round(orcasmerge$co2-orcasmerge$co2qcls,3)
orcasmerge$co2mnoaa=round(orcasmerge$co2-orcasmerge$co2noaa,3)
orcasmerge$co2mao2=round(orcasmerge$co2-orcasmerge$co2ao2,3)
orcasmerge$ch4mqcls=round(orcasmerge$ch4noaa-orcasmerge$ch4qcls,3)

In [34]:
# write out
write(names(orcasmerge),'../data/aircraft-obs/ORCAS_SO_mSPO.txt',ncol=ncol(orcasmerge))
write(t(orcasmerge),'../data/aircraft-obs/ORCAS_SO_mSPO.txt',ncol=ncol(orcasmerge),append=T)

print(apply(!is.na(orcasmerge),2,sum))

    year      mon      day     hour      min      sec      flt     prof 
   44703    44703    44703    44703    44703    44703    44703    44703 
     lat      lon      alt pressure    theta      co2  co2qcls  co2noaa 
   44703    44703    44703    44581    44580    38560    16277    34251 
  co2ao2  ch4noaa  ch4qcls    strat   h2oref   n2oref  co2mspo co2mqcls 
   31753    34254    23452    44703    44440    22462    38560    15105 
co2mnoaa  co2mao2 ch4mqcls 
   34251    29649    21267 


# ATom

In [35]:
# filter
ints=read.table(paste(project_tmpdir_obs,'/atom_xsect_filt_datetime.txt',sep=''),header=T)
startdt=ISOdatetime(ints$startyear,ints$startmon,ints$startday,ints$starthour,ints$startmin,ints$startsec,tz='UTC')
stopdt=ISOdatetime(ints$stopyear,ints$stopmon,ints$stopday,ints$stophour,ints$stopmin,ints$stopsec,tz='UTC')
blfilt=rep(T,nrow(atommerge))
for(i in c(1:nrow(ints))){
    blfilt[difftime(atomdt,startdt[i])>=0&difftime(atomdt,stopdt[i])<=0]=F
}
atomdt=atomdt[blfilt]
atommerge=atommerge[blfilt,]
print(paste('Filtered ',sum(!blfilt),' of ',length(blfilt),' ATom obs (',round(sum(!blfilt)/length(blfilt)*100,1),'%)',sep=''))

[1] "Filtered 4218 of 149133 ATom obs (2.8%)"


In [36]:
# calculate differences
atommerge$co2mspo=round(atommerge$co2-approx(as.POSIXct(spodt),spoco2$co2,as.POSIXct(atomdt))$y,2) ## co2 = 'CO2_NOAA'
atommerge$co2mqcls=round(atommerge$co2-atommerge$co2qcls,3)
atommerge$co2mao2=round(atommerge$co2-atommerge$co2ao2,3)
atommerge$co2mx=round(atommerge$co2-atommerge$co2x,3)
atommerge$ch4mqcls=round(atommerge$ch4noaa-atommerge$ch4qcls,3)
atommerge$ch4mucats=round(atommerge$ch4noaa-atommerge$ch4ucats,3)
atommerge$ch4mpanther=round(atommerge$ch4noaa-atommerge$ch4panther,3)

In [37]:
# write out
write(names(atommerge),'../data/aircraft-obs/ATOM_SO_mSPO.txt',ncol=ncol(atommerge))
write(t(atommerge),'../data/aircraft-obs/ATOM_SO_mSPO.txt',ncol=ncol(atommerge),append=T)

print(apply(!is.na(atommerge),2,sum))

       year         mon         day        hour         min         sec 
     144915      144915      144915      144915      144915      144915 
       camp         flt        prof         lat         lon         alt 
     144915      144915      144915      144915      144915      144915 
   pressure       theta         co2     co2qcls      co2ao2        co2x 
     144853      144853      131654       97026      119777      136261 
    ch4noaa     ch4qcls    ch4ucats  ch4panther  sf6panther    sf6ucats 
     136496       75929        6802        7077       15072       13949 
      strat      h2oref      n2oref       o3ref     co2mspo    co2mqcls 
     144915      144770       88108      144781      131654       86894 
    co2mao2       co2mx    ch4mqcls   ch4mucats ch4mpanther 
     108784      130545       71604        6407        6703 
