In [2]:
#Qmak_BDCT_FDM_V1.5.ipynb
##last modified 20230309 

#Builds the BDCT SUS FDM
#Assumes that the targetdb below exists
#Needs code to build it if it doesn't

#You will need to manually amend the target dataset for this script

library(bigrquery)
library(lubridate) 

# Store the project ID
project_id = "yhcr-prd-phm-bia-core"

sourcedb <-"yhcr-prd-phm-bia-core.CB_STAGING_DATABASE_FDM_Format"
targetdb <-'yhcr-prd-phm-bia-core.CB_FDM_ICNARC'
targetdb <-gsub(' ','',targetdb)
print (sourcedb) 
print (targetdb)

[1] "yhcr-prd-phm-bia-core.CB_STAGING_DATABASE_FDM_Format"
[1] "yhcr-prd-phm-bia-core.CB_FDM_ICNARC"


In [3]:
#New section to build user table loop 

#Your loop references this table
#You run this to load the list of tables into the dataframe "table"

sql3 <-paste('select distinct table_name  from ' ,sourcedb,'.INFORMATION_SCHEMA.COLUMNS where table_name like \'tbl_icnarc%\' and column_name = \'person_id\' order by table_name ;', sep = "")
tb3 <- bq_project_query(project_id, sql3)

table <- bq_table_download(tb3)
table 

print("Done builder table") 


table_name
<chr>
tbl_icnarc_data


[1] "Done builder table"


In [5]:
#This then loops through the data frame until the end
# uses the two variables table[[i, 1]] and table[[i, 2]]
# in the query

sql4 <-paste('drop table if exists ',targetdb,'.tmp_persons;', sep = "")
tb4 <-bq_project_query(project_id, sql4)


sql5 <-paste('create table ',targetdb,'.tmp_persons 
(person_id int64);', sep = "")
tb5 <-bq_project_query(project_id, sql5)
#print(tb5)


#Loops through all the data tables to build a list of unique person_ids from all of them 

for(i in 1:nrow(table)) 
{
# for-loop over columns
    #print(paste0("Table: ", table[[i, 1]]))
    temp_sql_query  <-paste('insert into ',targetdb,'.tmp_persons select distinct cast(person_id as int64)  from ' ,sourcedb,'.',table[[i, 1]] ,';' , sep = "") 
    temp_queried_table <- bq_project_query(project_id, temp_sql_query)
    temp_table <- bq_table_download(temp_queried_table)
    #print(paste0("done ", temp_table[[1, 1]]))
    #cat("\n")
}

fred <-paste('tmp person table built ' ,Sys.time() + hours(1), sep = "")
print (fred)

#Now build the persons table 

sql11 <-paste('drop table if exists ' ,targetdb,'.person', sep = "")
tb11 <- bq_project_query(project_id, sql11)


sql12 <-paste('create table ' ,targetdb,'.person 
as 
SELECT distinct a.person_id
, gender_concept_id
, year_of_birth
, month_of_birth
, day_of_birth
, birth_datetime
, death_datetime
, race_concept_id
, ethnicity_concept_id
, location_id
, provider_id
, care_site_id
, person_source_value
, gender_source_value
, gender_source_concept_id
, race_source_value
, race_source_concept_id
, ethnicity_source_value
, ethnicity_source_concept_id 
FROM yhcr-prd-phm-bia-core.CB_FDM_MASTER.person a
,',targetdb,'.tmp_persons b where a.person_id =  b.person_id' , sep = "")

tb12 <- bq_project_query(project_id, sql12)
#print(sql12)


fred <-paste('person table built ' ,Sys.time() + hours(1), sep = "")
print (fred)



#Ensure you have updated the lookup table ie
#sql14 <- "update `yhcr-prd-phm-bia-core.CY_LOOKUPS.tbl_Dataset_ExtractDateRef` set extract_date = '2022-08-23' where DatasetName = 'BDCT'"  

#tb14 <- bq_project_query(project_id, sql14)



#This query makes the observation period based on the persons table BUT adds in rules for this data extract.
#This query makes the observation period based on the persons table BUT adds in rules for this data extract.

#Qmak_BDCT_observation period_Part1

sql15 <-paste('drop table if exists ' ,targetdb,'.tmp_Eventdates' , sep = "")
sql16 <-paste('drop table if exists ' ,targetdb,'.tmp_EventdatesValid' , sep = "")
sql17 <-paste('truncate table ' ,targetdb,'.observation_period' , sep = "")

tb15 <-bq_project_query(project_id, sql15)
tb16 <-bq_project_query(project_id, sql16)
tb17 <-bq_project_query(project_id, sql17)

#This just builds a temp table for later use 
sql18 <-paste('create table ',targetdb,'.tmp_Eventdates 
(person_id int64
, EventDate Datetime) ' , sep = "")
tb18 <-bq_project_query(project_id, sql18)



fred <-paste('temp event dates built ' ,Sys.time() + hours(1), sep = "")
print (fred)


#Loop through the user tables
#selecting person_id and date from and date to for each of them.

#start_date_first
for(i in 1:nrow(table)) 
{
# for-loop over columns
    #print(paste0("Table: ", table[[i, 1]]))
    temp_sql_query  <-paste('insert into ',targetdb,'.tmp_Eventdates select distinct cast(person_id as int64) as person_id, ',table[[i, 1]] ,'_start_date from ' ,sourcedb,'.',table[[i, 1]] ,' where person_id is not null;' , sep = "") 
    temp_queried_table <- bq_project_query(project_id, temp_sql_query)
    #temp_table <- bq_table_download(temp_queried_table)
    #print(temp_sql_query)
    #print(paste0("done ", temp_table[[i, 1]]))
    #cat("\n")
}
fred <-paste('Done start dates ' ,Sys.time() + hours(1), sep = "")
print (fred)


#Now do end date
for(i in 1:nrow(table)) 
{
# for-loop over columns
    #print(paste0("Table: ", table[[i, 1]]))
    temp_sql_query  <-paste('insert into ',targetdb,'.tmp_Eventdates select distinct cast(person_id as int64) as person_id, ',table[[i, 1]] ,'_end_date from ' ,sourcedb,'.',table[[i, 1]] ,' where person_id is not null ;' , sep = "") 
    temp_queried_table <- bq_project_query(project_id, temp_sql_query)
    #temp_table <- bq_table_download(temp_queried_table)
    #print(temp_sql_query)
    #print(paste0("done ", temp_table[[i, 1]]))
    #cat("\n")
}


fred <-paste('Done end dates ' ,Sys.time() + hours(1), sep = "")
print (fred)


sql24 <-paste('create table ',targetdb,'.tmp_EventdatesValid
as
select distinct a.person_id
,cast(a.EventDate as date) as EventDate
from ',targetdb,'.tmp_Eventdates a ,' 
,targetdb,'.person e 
where e.person_id = a.person_id 
and e.death_datetime is not null 
and a.EventDate >= e.birth_datetime
and a.EventDate <= date_add(e.death_datetime, INTERVAL 42 day) 
and a.EventDate <= (Select extract_date from yhcr-prd-phm-bia-core.CB_LOOKUPS.tbl_Dataset_ExtractDateRef where DatasetName = \'ICNARC\')', sep = "")


#-- BUT this only includes persons with a death datetime
#-- So section below includes those with a null deathdatetime

sql25 <-paste('insert into ',targetdb,'.tmp_EventdatesValid
select distinct a.person_id
, cast(a.EventDate as date) as EventDate
from ',targetdb,'.tmp_Eventdates a
,' ,targetdb,'.person e 
where e.person_id = a.person_id 
and e.death_datetime is null 
and a.EventDate >= e.birth_datetime
and a.EventDate <= (Select extract_date from yhcr-prd-phm-bia-core.CB_LOOKUPS.tbl_Dataset_ExtractDateRef where DatasetName = \'ICNARC\')', sep = "")

fred <-paste('Temp valid dates built ' ,Sys.time() + hours(1), sep = "")
print (fred)



#third part pushes these into the observation_period table

sql26 <-paste('insert into ' ,targetdb,'.observation_period
select distinct
ROW_NUMBER() over (Order by person_id) as observation_period_id 
, person_id
, min(EventDate) as observation_period_start_date
, max(EventDate) as observation_period_end_date
,null as period_type_concept_id
from ',targetdb,'.tmp_EventdatesValid
group by person_id', sep = "")

#Finally Run these sql's


tb24 <-bq_project_query(project_id, sql24)
tb25 <-bq_project_query(project_id, sql25)
tb26 <-bq_project_query(project_id, sql26)

fred <-paste('observation period built ' ,Sys.time() + hours(1), sep = "")
print (fred)



# Now remove from person table where there is no observation period
# This happens where there is patient data but no actual records 
# or there is patient data but no dob 

sql26a <-paste(' delete from ' ,targetdb,'.person where person_id
not in (select person_id from ', targetdb, ' .observation_period)', sep = "")

tb26a <-bq_project_query(project_id, sql26a) 

fred <-paste('invalid people removed ' ,Sys.time() + hours(1), sep = "")
print (fred)





# As the tables are all fdm ready we can just copy them into the FDM space
# once we've dropped them - if they exist ! 
# So drop loop next 

#start drop them first
for(i in 1:nrow(table)) 
{
# for-loop over columns
    #print(paste0("Table: ", table[[i, 1]]))
    temp_sql_query  <-paste('drop table if exists ',targetdb,'.', table[[i, 1]] ,';' , sep = "") 
    temp_queried_table <- bq_project_query(project_id, temp_sql_query)
    #temp_table <- bq_table_download(temp_queried_table)
    #print(temp_sql_query)
    #print(paste0("done ", temp_table[[i, 1]]))
    #cat("\n")
}
fred <-paste('Done - user tables removed ' ,Sys.time() + hours(1), sep = "")
print (fred)

In [6]:
#Now build them 
for(i in 1:nrow(table)) 
{
# for-loop over columns
    #print(paste0("Table: ", table[[i, 1]]))
    temp_sql_query  <-paste('create table ',targetdb,'.',table[[i, 1]] ,' as select src.*  from ' ,sourcedb,'.',table[[i, 1]],' src 
    , ',targetdb,'.observation_period obs where cast(src.person_id as int64)  = obs.person_id 
    and src.',table[[i, 1]],'_end_date <= obs.observation_period_end_date  
    and src.',table[[i, 1]],'_start_date >= obs.observation_period_start_date', sep = "")
    #print(temp_sql_query)
    temp_queried_table <- bq_project_query(project_id, temp_sql_query)
    #temp_table <- bq_table_download(temp_queried_table)
    #print(paste0("done ", temp_table[[i, 1]]))
    #cat("\n")
}

fred <-paste('Done - user tables built ' ,Sys.time() + hours(1), sep = "")
print (fred)

[1] "Done - user tables built 2023-04-27 15:04:19"


In [64]:
print ("starting visit builders")

#VISITBuilders
#These are added value scripts that build pseudo visit occurences tables. 
#Due to the variances of the source data it may not be possible to build generic
#visit occurrence builders.

#This is a temporary table to bring together all visit information that we may be interested in

sql62 <-paste(' drop table if exists ' ,targetdb,'.tmp_visit_builder', sep = "")
tb62 <-bq_project_query(project_id, sql62) 

#Make the tmp_visit_builder_table

sql2 <-paste('create table if not exists ' ,targetdb,'.tmp_visit_builder
( source_table string,
  source_table_identifier string,
  person_id int64,
  visit_description string,
 visitstartdate datetime,
 visitenddate datetime,
 visitlocation string,
 visitstartplace string,
 visitendplace string);', sep = "")
tb2 <- bq_project_query(project_id, sql2)

eric <- paste('temp visit builder table built ' ,Sys.time() + hours(1), sep = "")
print (eric)



#sql62 <-paste(' truncate table ' ,targetdb,'.tmp_visit_builder', sep = "")
#tb62 <- bq_project_query(project_id, sql62)


sql63 <-paste(' insert into ' ,targetdb,'.tmp_visit_builder
( source_table
, source_table_identifier
, person_id
, visit_description
, visitstartdate
, visitenddate
, visitlocation 
)
select distinct 
       \"tbl_srcode\" 
      ,a.rowidentifier 
      ,cast(a.person_id as int64)
     ,\"idappointment  \" || a.idappointment 
     ,tbl_bdct_srcode_start_date
     ,tbl_bdct_srcode_end_date
     ,a.idorganisationdoneat
      from ' ,targetdb,'.tbl_bdct_srcode a
  , ' ,targetdb,'.person c
  , ' ,targetdb,'.observation_period obs
  where cast(a.person_id as int64) = c.person_id
  and cast(c.person_id as int64) = obs.person_id
  and cast(a.tbl_bdct_srcode_end_date as date) <= obs.observation_period_end_date
  and cast(a.tbl_bdct_srcode_start_date as date) >= obs.observation_period_start_date', sep = "")

tb63 <- bq_project_query(project_id, sql63)

eric <- paste('tbl_srcode added to visit builder ' ,Sys.time() + hours(1), sep = "")
print (eric)

#Qapp_BDCT_FDM_Visit_Detail_srcVisits
#Last modified 20220831 - JDB
#Thsi is adding te evisits 

sql67 <-paste(' truncate table ' ,targetdb,'.visit_detail', sep = "") 
tb67 <- bq_project_query(project_id, sql67)

sql68 <-paste(' insert into ' ,targetdb,'.visit_detail 
(      visit_detail_id 
      ,person_id
      ,rowid      
      ,table
      ,clinicaldataid 
      ,visitstartdate
      ,visitstart
      ,visitenddate
      ,visitend
      ,visitstartplace
      ,visitendplace
      ,visitlocation
      ,visitprovider
    ,visitdesc
    ,visit_detail_concept_id
)
select distinct ROW_NUMBER() over (Order by c.source_table_identifier) as visit_detail_id
  , cast(b.person_id as int64) 
  , cast(c.source_table_identifier as string) as rowid  
  , \'bdct srcode appointments\' as table
  , cast(c.source_table_identifier as string) as clinicaldataid
  ,cast(a.tbl_bdct_srappointment_start_date as date) as visitstartdate
  ,a.tbl_bdct_srappointment_start_date as visitstartdatetime
  ,cast(a.tbl_bdct_srappointment_end_date as date)  as visitenddate
  ,a.tbl_bdct_srappointment_end_date as visitenddatetime
  ,a.IDOrganisation as startplace
  ,a.IDOrganisation as endplace
  ,a.IDOrganisation as visitlocation
  ,a.idprofileclinician as visitprovider
  ,\'srcode srvisit detail\' as visitdesc
  ,38004693 as visit_detail_concept_id
   from ' ,targetdb,'.tbl_bdct_srappointment  a 
  inner join ' ,targetdb,'.tbl_bdct_srpatient  b
  on a.person_id = b.person_id  
  inner join ' ,targetdb,'.tmp_visit_builder c
  on a.rowidentifier = rtrim(substr(c.visit_description, 16,20)) 
  inner join ' ,targetdb,'.observation_period obs
  on cast(a.person_id as int64)  = obs.person_id
    and cast(a.tbl_bdct_srappointment_end_date as date) <= obs.observation_period_end_date
  and cast(a.tbl_bdct_srappointment_start_date as date) >= obs.observation_period_start_date', sep = "")

tb68 <- bq_project_query(project_id, sql68)

eric <- paste('tbl_srcode appointment added to visit details ' ,Sys.time() + hours(1), sep = "")
print (eric)

#Qapp_BDCT_FDM_Visit_Occurrences_ALL
#Last modified - 20220831 - JDB

#amended to sql <-paste(' insert everything into the occurrences table 

sql71 <-paste(' truncate table ' ,targetdb,'.visit_occurrence ', sep = "")
tb71 <- bq_project_query(project_id, sql71)

sql72 <-paste(' insert into ' ,targetdb,'.visit_occurrence 
(      person_id
      ,visit_occurrence_id
      ,rowid      -- the new visit table rowidentifier
      ,table
      ,clinicaldataid 
      ,visitstartdate
      ,visitstartdatetime
      ,visitenddate
      ,visitenddatetime
      ,visitduration
      ,visitstartplace
      ,visitendplace
      ,visitlocation
#     ,care_site_id
      ,visitprovider
#       --,provider_id
      ,visitdesc
      ,visit_source_concept_id
)
select distinct a.person_id
      ,cast(c.source_table_identifier as int64) as visit_occurrence_id 
      ,cast(c.source_table_identifier as string) as rowid  
        , \'bdct appointments\' as table
        ,cast(a.rowidentifier as string)  as clinicaldataid 
      ,cast(a.tbl_bdct_srcode_start_date as Date) as visitstartdate
        ,a.tbl_bdct_srcode_start_date  as visitstartdatetime
      ,cast(a.tbl_bdct_srcode_end_date as Date) as visitenddate
        ,a.tbl_bdct_srcode_end_date as visitenddatetime
      ,\'0\' as duration
        ,a.IDOrganisationDoneAt as visitstartplace
        ,a.IDOrganisationDoneAt as visitendplace
        ,a.IDOrganisationDoneAt as visitlocation
 #       ,a.care_site_id  as care_site_id
        ,a.IDDoneBy as visitprovider
       # --,e.provider_id as provider_id
        ,a.ctv3text as visitdesc
        ,38004693 as visit_source_concept_id
   from ' ,targetdb,'.tbl_bdct_srcode  a 
  inner join ' ,targetdb,'.person b
  on a.person_id = b.person_id 
  inner join ' ,targetdb,'.tmp_visit_builder c
  on a.RowIdentifier = c.source_table_identifier', sep = "")

tb72 <- bq_project_query(project_id, sql72)

eric <- paste("visit occurrence built "  ,Sys.time() + hours(1) ,sep = "")
print(eric)


sql90 <-paste('drop table if exists ',targetdb,'.tmp_persons;', sep = "")
tb90 <-bq_project_query(project_id, sql90)

sql91 <-paste('drop table if exists ',targetdb,'.tmp_Eventdates;', sep = "")
tb91 <-bq_project_query(project_id, sql91)

sql92 <-paste('drop table if exists ',targetdb,'.tmp_EventdatesValid;', sep = "")
tb92 <-bq_project_query(project_id, sql92)

print("temp tables dropped")


fred <-paste('FDM Build of',targetdb,'finished')
print (fred)