Skip to content

Commit

Permalink
Make R-based regression use the 'drop_unweighted' parameter and updat…
Browse files Browse the repository at this point in the history
…e the weight name the same way
  • Loading branch information
jrm5100 committed Oct 16, 2020
1 parent 9902b4c commit fbfb016
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 10 deletions.
27 changes: 17 additions & 10 deletions clarite/modules/analyze/regression/r_code/ewas_r.R
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ regress_cat_survey <- function(data, varying_covariates, phenotype, var_name, re

# General Regression function which applies some filters/tests before calling the actual regression
regress <- function(data, y, var_name, covariates, min_n, allowed_nonvarying, regression_family, var_type,
use_survey, single_weight, weights, strata, fpc, ids, subset_array, ...){
use_survey, single_weight, weights, strata, fpc, ids, subset_array, drop_unweighted, ...){
# The result list will be used to update results for this variable
result = list()

Expand Down Expand Up @@ -263,19 +263,26 @@ regress <- function(data, y, var_name, covariates, min_n, allowed_nonvarying, re
}
# Get weight values, returning early if there is a problem with the weight
if(!(weight %in% names(data))){
# Weight values are missing
warning(paste(var_name, " had a NULL result because its weight (", weight, ") was not found"))
result$weight <- paste(weight, " (not found)")
return(data.frame(result, stringsAsFactors = FALSE))
} else if(sum(!is.na(data[var_name]) & is.na(data[weight])) > 0){
warning(paste(var_name, " had a NULL result because its weight (", weight, ") had ", sum(is.na(data[weight])), " missing values when the variable was not missing"))
result$weight <- paste(weight, " (missing values)")
return(data.frame(result, stringsAsFactors = FALSE))
} else {
# Get weights
weight_values <- data[weight]
# Fill NA weight values with 0 to pass an internal check by survey
weight_values[is.na(weight_values),] <- 0
}
missing_weight_count <- sum(!is.na(data[var_name]) & is.na(data[weight]) & subset_data)
if(missing_weight_count > 0){
# Some weights in the subset are missing when the variable is not
warning(paste(var_name, " had a NULL result because its weight (", weight, ") had ", missing_weight_count, " missing values when the variable was not missing"))
result$weight <- paste0(weight, " (", missing_weight_count, " observations are missing weights)")
if (!drop_unweighted){
# Return early with no result if dropping unweighted was not enabled
return(data.frame(result, stringsAsFactors = FALSE))
}
}
# Get weights
weight_values <- data[weight]
# Fill NA weight values with 0 to pass an internal check by survey
weight_values[is.na(weight_values),] <- 0

# Load strata, fpc, and ids
if(!is.null(strata)){
strata_values <- data[strata]
Expand Down
1 change: 1 addition & 0 deletions clarite/modules/analyze/regression/r_survey_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,6 +198,7 @@ def run(self):
min_n=self.min_n,
weights=weights,
subset=self.survey_design_spec.subset_array,
drop_unweighted=self.survey_design_spec.drop_unweighted,
**kwargs)

result = ewasresult2py(result)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,7 @@ def run(self):
weight_name, missing_weight_mask, warning = self.survey_design_spec.check_missing_weights(data, rv)
if warning is not None:
self.warnings[rv].append(warning)
self.results[rv]["Weight"] = weight_name

# Get complete case mask
complete_case_mask = self.get_complete_case_mask(data, rv) # Complete cases
Expand Down

0 comments on commit fbfb016

Please sign in to comment.