In [431]:
#######################################################
#-- ***Parent Directory and code for ALL inversions***
#######################################################

 code_dir <- "/projects/SSIM-GHG/"
 data_dir <-  "/Users/aschuh/SSIM-GHG/data/"
 output_dir <- "~/temp/output/"
 #code_dir <-  "/home/rstudio/SSIM-GHG/"
 #data_dir <-  "~/shared/ssim-ghg-data/inversion_examples/"
 #output_dir <- "../../output/"

Rcode_dir <- file.path(code_dir,"batch/")

setwd(Rcode_dir)

###############################################
#-- Load Code
##############################################
source(file.path(Rcode_dir,"util_code_032024.R"))
source(file.path(Rcode_dir,"plot_concentrations.R"))
source(file.path(Rcode_dir,"inversion_032024.R"))
source(file.path(Rcode_dir,"write_inversion_2_netcdf_032024.R"))
source(file.path(Rcode_dir,"generate_transcom_flux_ensemble_from_inversion.R"))
       
###############################################
#-- Required Libraries
###############################################
require(ncdf4)
require(plyr)
require(dplyr)
require(parallel)
require(ggplot2)
require(abind)
require(Matrix)
require(lattice)
require(memuse)
require(EnvStats)
require(gridExtra)

########################
#--  Detect Cores
########################
print(paste("Num CPUs:",detectCores(),"cores"))
memuse::Sys.meminfo()

[1] "Num CPUs: 12 cores"


Totalram:  64.000 GiB 
Freeram:   11.486 GiB 

In [401]:
###############################################
#--  Load sensitivity matrices 
###############################################

load(file.path(data_dir,"jacobians/","trunc_full_jacob_030624_with_dimnames_sib4_4x5_mask.rda"))
load(file.path(data_dir,"jacobians/","jacob_bgd_021624.rda"))

#-- We have units error somewhere here, need this for now
jacob <- jacob * 12/44
jacob_bgd <- jacob_bgd 

fire_fixed <- jacob_bgd[,2]
fossil_fixed <- jacob_bgd[,3]
###################################################################
#-- END END END ***Parent Directory and code for ALL inversions***
###################################################################


In [402]:

##################################################################
#- Inversion #1   *************************
##################################################################

#################################
#- Target truth in state space
#################################

##################################################################
#-- This array holds ratios of OCO2v10MIP fluxes and SiB4 fluxes
#-- as examples of "scalings" to be recovered. It also holds corresponding
#-- differences if the inversion attempts to directly solve for flux
#-- truth_array(24 months, 23 transcom, 98 inversions, (ratio, difference) )
##################################################################

#load("/projects/sandbox/inversion_workshop_scripts/truth_array.rda")
load(file.path(data_dir,"misc/truth_array.rda"))

xx = truth_array[,-1,1,1]

state_vector_true= tm(as.vector(truth_array[,-1,1,1]),-1,1)

#state_vector_true = c(rep(0.5,24*11),rep(-0.5,24*11))

#state_vector_mat = matrix(state_vector_true,nrow=24,byrow=FALSE)
#dim(state_vector_mat)
#state_vector_mat[,15]

In [452]:
#########################################################
# Generate a prior flux covariance matrix P_0
# Long term, a catalog of predefined choices is best here I think
#########################################################
land_prior_sd = 0.5 * 0.85
ocean_prior_sd = 1 * 0.85


#-- induce temporal correlations
sigma = bdiag(rep(list(ar_covariance(24, 0.5)), 22))
#sigma = diag(rep(1,528))

#-- scale by variance for land/ocean
var_scaling_diagonal = diag(c(rep(land_prior_sd,24*11),rep(ocean_prior_sd,24*11)))
sigma = var_scaling_diagonal %*% sigma %*% t(var_scaling_diagonal)
#diag(sigma) = NA
#levelplot(as.matrix(sigma)[1:24,1:24],col.regions=my.col(20),at=seq(-0.5,0.5,length=20))

In [438]:
####################################################################################
#-- WHICH obs do you want to use in the inversion? 
#-- examples of selecting on stations, type of data, lat/lon box,etc
####################################################################################

#load(file.path(data_dir,"obs/obs_catalog_030624.rda")) # obs_catalog object
load(file.path(data_dir,"obs/obs_catalog_042424_unit_pulse_hour_timestamp_witherrors_withdates.rda")) 


subset_indicator_obs=rep(FALSE,dim(jacob)[1])
#subset_indicator_obs=rep(TRUE,dim(jacob)[1])

#subset_indicator_obs=c(rep(TRUE,156383),rep(FALSE,1000000))

############################
#-- SAMPLE BY TYPE EXAMPLE
############################
#subset_indicator_obs[obs_catalog$TYPE == "TCCON"] = TRUE
#subset_indicator_obs1 = rep(FALSE,length(subset_indicator_obs))
#subset_indicator_obs2 = rep(FALSE,length(subset_indicator_obs))

#subset_indicator_obs1[obs_catalog$TYPE == "OCO2"] = TRUE
#subset_indicator_obs2[seq(1,1156383,by=2)] = TRUE
#subset_indicator_obs = subset_indicator_obs1 & subset_indicator_obs2


############################
#-- SAMPLE BY NOAA STATION EXAMPLE
############################
#subset_indicator_obs[grep("spo",obs_catalog$ID)] = TRUE
#subset_indicator_obs[grep("lef",obs_catalog$ID)] = TRUE

############################
#-- SAMPLE BY TIME EXAMPLE
############################
#subset_indicator_obs[obs_catalog$TIME > 8738000] = TRUE

############################
#-- SAMPLE BY LON & LAT EXAMPLE
############################
#subset_indicator_obs[obs_catalog$LON < -10 & obs_catalog$LAT > 10] = TRUE

#subset_indicator_obs=c(rep(TRUE,1156382),rep(FALSE,1))
subset_indicator_obs[seq(1,1156383,by=10)] = TRUE
#table(subset_indicator_obs)

print(paste("using",sum(subset_indicator_obs),"of",length(subset_indicator_obs),"observations"))

[1] "using 115639 of 1156383 observations"


In [439]:
##########################################################
#-- sd for Gaussian i.i.d. errors, jacob is sens matrix
##########################################################
R_diagonal_in = rep(3,(dim(jacob)[1]))
#R_diagonal_in = obs_catalog$SD

In [440]:
#############################################################
#-- Generate obs, 'y',  set.seed() ????
#-- currently leaving out bgd and all fixed
#-- non-optimizable contributions including fire and fossil
#############################################################

y_in = jacob %*% (1+state_vector_true) + rnorm(length(R_diagonal_in),sd=R_diagonal_in)


$$
\newcommand{\transpose}[1]{{#1^{\scriptscriptstyle T}}} 
J(x) = \transpose{(x_0 - x)} {\Sigma_x
}^{-1}(x_0 - x) + \transpose{(z - Hx)} {\Sigma_z}^{-1}(z - Hx)\\
$$

$$
\newcommand{\transpose}[1]{{#1^{\scriptscriptstyle T}}} 
\hat{x} = (\transpose{H}{\Sigma_z}^{-1}H + {\Sigma_x}^{-1})^{-1}(\transpose{H}{\Sigma_z}^{-1}(z-Hx)+{\Sigma_x}^{-1}x_0)
$$

$$
\newcommand{\transpose}[1]{{#1^{\scriptscriptstyle T}}} 
\Sigma_{\hat{x}} = {({\Sigma_x}^{-1} + \transpose{H}{\Sigma_z}^{-1}H )}^{-1}
$$







In [453]:
############################
#-- Run the actual inversion
############################

ret2 = invert_clean(H=jacob,R_diagonal=R_diagonal_in,P_0=sigma,y=y_in,H_bgd=jacob_bgd,
                    subset_indicator_obs=subset_indicator_obs,DOF=TRUE,output_Kalman_Gain=TRUE)



[1] "...cross product"
[1] ".. .deriving posterior covariance matrix of state, P"
[1] "...deriving posterior mean state, X_hat"
[1] "DOF Signal: 164.069035396092"
[1] "DOF Background: 363.930964603908"
[1] ""
[1] "************************************************************"
[1] "--Chi sq test on posterior residuals relative to S_z--"
[1] "************************************************************"
[1] "var est= 1.001  CI (stand variance, chi sq test):  ( 0.993 , 1.01 )"
[1] ""
[1] "************************************************************"
[1] "--Chi sq test on posterior vs truth, relative to S_xpost--"
[1] "************************************************************"
[1] "chi sq stat: 1.06920504490705"
[1] ""
[1] "*****************************************************************"
[1] "--Chi sq test: test variance of x_hat-x_prior, relative to S_0--"
[1] "*****************************************************************"
[1] "chi sq stat: 0.798905021625468"
[1] "Done....writing 

In [408]:
#-- This code looks through Kalman Gain for observation with largest gain in the month of interest for that region
max_indices = apply(ret2$diags$KGAIN,1,FUN=function(x){which(abs(x)==max(abs(x)))})
max_indices = as.numeric(as.vector(unlist(max_indices)))
sts = obs_catalog$ID[subset_indicator_obs][max_indices]
typ = obs_catalog$TYPE[subset_indicator_obs][max_indices]

for(i in 1:22){
    for(j in 1:24){
    print(paste(gsub(" ","",transcom_names[i])," month:",j," TYPE:",typ[(i-1)*24+j],"  ",sts[(i-1)*24+j],sep=""))    
    }
}

[1] "NorthAmericanBoreal month:1 TYPE:IS  obspack_co2_1_GLOBALVIEWplus_v6.1_2021-03-01~co2_brw_surface-insitu_1_allvalid~3269439"
[1] "NorthAmericanBoreal month:2 TYPE:IS  obspack_co2_1_GLOBALVIEWplus_v6.1_2021-03-01~co2_bck_surface-insitu_6_allvalid~2257783"
[1] "NorthAmericanBoreal month:3 TYPE:IS  obspack_co2_1_GLOBALVIEWplus_v6.1_2021-03-01~co2_crv_aircraft-pfp_1_allvalid~9380526"
[1] "NorthAmericanBoreal month:4 TYPE:IS  obspack_co2_1_GLOBALVIEWplus_v6.1_2021-03-01~co2_cps_surface-insitu_6_allvalid~9175858"
[1] "NorthAmericanBoreal month:5 TYPE:IS  obspack_co2_1_GLOBALVIEWplus_v6.1_2021-03-01~co2_chl_surface-insitu_6_allvalid~4640333"
[1] "NorthAmericanBoreal month:6 TYPE:IS  obspack_co2_1_GLOBALVIEWplus_v6.1_2021-03-01~co2_fsd_surface-insitu_6_allvalid~10708805"
[1] "NorthAmericanBoreal month:7 TYPE:IS  obspack_co2_1_GLOBALVIEWplus_v6.1_2021-03-01~co2_inu_surface-insitu_6_allvalid~13506710"
[1] "NorthAmericanBoreal month:8 TYPE:IS  obspack_co2_1_GLOBALVIEWplus_v6.1_2021-03-01~co2

In [22]:
sessionInfo()

R version 4.3.1 (2023-06-16)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.4.1

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/Denver
tzcode source: internal

attached base packages:
[1] parallel  stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
[1] dplyr_1.1.4     plyr_1.8.8      mvtnorm_1.2-4   ncdf4_1.21     
[5] lattice_0.21-8  MixMatrix_0.2.6 Matrix_1.5-4.1  ggplot2_3.4.2  

loaded via a namespace (and not attached):
 [1] gtable_0.3.3     jsonlite_1.8.7   compiler_4.3.1   crayon_1.5.2    
 [5] tidyselect_1.2.0 Rcpp_1.0.11      IRdisplay_1.1    scales_1.2.1    
 [9] uuid_1.2-0       fastmap_1.1.1    IR