# Create Covariates and BMI

This notebook combines the demographics and PCs we created earlier, then adds BMI as a final covariate.

In [None]:
library(tidyverse)

## Load existing data

First we will load our PCs and Demographics files.

In [None]:
demo = read_csv("demo.csv")

In [None]:
pcs = read_csv("pcs.csv")

## Combine and save the data

In [None]:
cov_in = inner_join(demo, pcs)

In [None]:
write_csv(cov_in,"covariates.csv")

In [None]:
system2("gsutil",c("cp","covariates.csv","$WORKSPACE_BUCKET/"), stdout=T,stderr=T)

## Create BMI Data

Next, we will get Program Physical Measurements BMI data. We'll configure our BQ retrieval again, then select the Measurement Source Concept IDs AoU uses for PM calculated BMI.

In [None]:
library(bigrquery)  # BigQuery R client.

## BigQuery setup.
BILLING_PROJECT_ID <- Sys.getenv('GOOGLE_PROJECT')
# Get the BigQuery curated dataset for the current workspace context.
CDR <- Sys.getenv('WORKSPACE_CDR')
# Bucket
WORKSPACE_BUCKET <- Sys.getenv('WORKSPACE_BUCKET')

#Query
bq <- function(query) {bq_table_download(bq_project_query(
    BILLING_PROJECT_ID, page_size = 25000,
    query=query, default_dataset = CDR ))
}

In [None]:
#Get the All of Us calculated BMI from enrollment
bmi_pm = bq("select person_id, min(value_as_number) BMI
from 
measurement join measurement_ext using (measurement_id)
where measurement_source_concept_id=903124 and value_as_number is not null
group by person_id
")

In [None]:
dim(bmi_pm)

In [None]:
bmi_pm %>% transmute(across(-person_id, \(x) sample(x))) %>% head()
#Uncomment for the original table view
#head(bmi_pm)

## Merge all of our covariates and save

In [None]:
cov_out = inner_join(bmi_pm, cov_in)

In [None]:
cov_out %>% transmute(across(-person_id, \(x) sample(x))) %>% head()
#Uncomment for the original table view
#head(cov_out)

### Write the file and save in the workspace bucket

In [None]:
write_csv(cov_out, "covariates.csv")

In [None]:
system2("gsutil",c("cp", "covariates.csv","$WORKSPACE_BUCKET/"), stdout=T,stderr=T)