# ============================
# = Glazsiou study | 0 years since diagnosis | Insulin sensitivity check  =
# ============================

The purpose of this notebook is to check what the effect is of different degrees of excluding people who are treated with insulin:
1. Excluding a person's entire record if they were ever prescribed insulin.
2. Excluding the portion of a person's record after their first prescription of insulin.
3. Including a clause in the definition of the HMA state that ignores insulin prescriptions.
The motivation behind the sensitivity analyses is that there were instances of inter-terst intervals being labelled as 'Adjust' becuase the interval beteen insulin prescriptions was larger than the threshold. The interval is not as relevant for insulin prescriptions so these inter-test intervals were mislabelled.



# Get requisite packages.

In [2]:
# Get requisite packages.
if( !"pacman" %in% installed.packages() )
{
  install.packages( "pacman" )
  libray( pacman )
}
pacman::p_load(
    bigrquery # Version ‘1.5.1’
    ,data.table # Version ‘1.16.0’
    ,GGally # Version ‘2.2.1’
    ,gtable # Version ‘0.3.6’
    ,grid # Version ‘4.4.1’
    ,gridExtra # Version ‘2.3’
    ,IRdisplay
    ,kableExtra
    ,paletteer # Version ‘1.6.0’
    ,readr # Version ‘2.1.5’
    ,tidytext # Version ‘0.4.2’
    ,tidyverse # Version ‘2.0.0’
    ,TraMineR # Version ‘2.2.10’
    ,TraMineRextras # Version ‘0.6.8’
)
#devtools::install_github("davidsjoberg/ggsankey")
#remove.packages("ggsankey")
devtools::install_github("ciaranmci/ggsankey", force = TRUE )

Downloading GitHub repo ciaranmci/ggsankey@HEAD




[36m──[39m [36mR CMD build[39m [36m─────────────────────────────────────────────────────────────────[39m
* checking for file ‘/var/tmp/Rtmpwak5Q8/remotes2d754552b520/ciaranmci-ggsankey-821b0e3/DESCRIPTION’ ... OK
* preparing ‘ggsankey’:
* checking DESCRIPTION meta-information ... OK
* checking for LF line-endings in source and make files and shell scripts
* checking for empty or unneeded directories
* building ‘ggsankey_0.0.99999.tar.gz’



Installing package into ‘/home/jupyter/.R/library’
(as ‘lib’ is unspecified)



# Set cohort parameters

In [3]:
# Study dates
# ## The date before which a patient must have had their diagnosis.
date_diagnosis_threshold <- lubridate::ymd('2000-01-01')
# ## The date after which test and intervention records will be studied.
followup_delay_in_years <- 0
date_followup_start <- date_diagnosis_threshold + lubridate::years( followup_delay_in_years )
# ## The date before which test and intervention records will be studied.
followup_duration_in_years <- 10
date_followup_end <- date_followup_start + lubridate::years( followup_duration_in_years )

# Set the duration of the window back in time to review prescriptions when identifying
# the HMA status.
HMA_adjust_lookBack_window <- lubridate::weeks( 16 )

# Set upper and lower thresholds for acceptable values of the test.
test_value_cutoff_lower <- 20
test_value_cutoff_upper <- 200

# Threshold for the expected interval between subsequent tests, in months
val_testing_interval_LB <- 2
val_testing_interval_UB <- 5

# Set values for meaningful changes in the values of the test.
val_meaningful_test_improvement <- -10
val_meaningful_test_disimprovement <- 10

# Set window within which to search for repeated (but not repeat) prescriptions.
window_repeated_prescription_months <- 3

# Set number of tests, treatments, or iterations after diagnosis that should be tracked.
n_iterations <- followup_duration_in_years*2

# Set the window within which mutimorbidity diagnoses and the index diagnosis must fit in, in months.
multimorb_inclusion_window_months <- 60

# Set the window outwith which at least two mutimorbidity diagnoses must be of each other, in months.
multimorb_gap_window_months <- 1

# Generate the cohort.

In [4]:
source('RESHAPE_cohort_generator.r')

“[1m[22mThe `check_from` argument of `tbl_sql()` is deprecated as of dbplyr 2.5.0.
[36mℹ[39m The deprecated feature was likely used in the [34mdbplyr[39m package.
  Please report the issue at [3m[34m<https://github.com/tidyverse/dbplyr/issues>[39m[23m.”
“[1m[22mMissing values are always removed in SQL aggregation functions.


# Format the data.

In [5]:
source('RESHAPE_format_the_data.r')

[1m[22m`summarise()` has grouped output by 'person_id'. You can override using the
`.groups` argument.


# 1. Excluding a person's entire record if they were ever prescribed insulin.

In [66]:
# Table of HMA states before removing insulin people.
tbl_intervals_per_HMA__all_people <-
    df_log_PandT_longFormat_simplified_StrataLabels %>%
    dplyr::distinct( person_id, idx_test_interval, HMA ) %>%
    dplyr::filter( HMA != "Unobserved", !is.na( HMA ) ) %>%
    dplyr::arrange( person_id, idx_test_interval ) %>%
    dplyr::group_by( HMA ) %>%
    dplyr::reframe( n = n() )
n_people__all_people <-
    df_log_PandT_longFormat_simplified_StrataLabels %>%
    dplyr::distinct( person_id ) %>%
    nrow()
n_intervals__all_people <- 
    tbl_intervals_per_HMA__all_people %>%
    dplyr::pull() %>%
    sum()


# Table of HMA states of insulin people and non insulin people.
tbl_indicating_insulin_or_not <- 
    df_log_PandT_longFormat_simplified_StrataLabels %>%
    dplyr::group_by( person_id ) %>%
    dplyr::mutate( used_insulin = as.logical( any( event_value == "Insulin" ) ) ) %>%
    dplyr::ungroup() %>%
    dplyr::select( person_id, idx_test_interval, HMA, used_insulin )

tbl_intervals_per_HMA__insulin_users_only <-
    tbl_indicating_insulin_or_not %>%
    dplyr::filter( used_insulin == TRUE ) %>%
    dplyr::distinct( person_id, idx_test_interval, HMA ) %>%
    dplyr::filter( HMA != "Unobserved", !is.na( HMA ) ) %>%
    dplyr::arrange( person_id, idx_test_interval ) %>%
    dplyr::group_by( HMA ) %>%
    dplyr::reframe( n = n() )
n_people__insulin_users_only <-
    tbl_indicating_insulin_or_not %>%
    dplyr::filter( used_insulin == TRUE ) %>%
    dplyr::distinct( person_id ) %>%
    nrow()
n_intervals__insulin_users_only <- 
    tbl_intervals_per_HMA__insulin_users_only %>%
    dplyr::pull() %>%
    sum()

tbl_intervals_per_HMA__non_insulin_users_only <-
    tbl_indicating_insulin_or_not %>%
    dplyr::filter( used_insulin == FALSE ) %>%
    dplyr::distinct( person_id, idx_test_interval, HMA ) %>%
    dplyr::filter( HMA != "Unobserved", !is.na( HMA ) ) %>%
    dplyr::arrange( person_id, idx_test_interval ) %>%
    dplyr::group_by( HMA ) %>%
    dplyr::reframe( n = n() )
n_people__non_insulin_users_only <-
    tbl_indicating_insulin_or_not %>%
    dplyr::filter( used_insulin == FALSE ) %>%
    dplyr::distinct( person_id ) %>%
    nrow()
n_intervals__non_insulin_users_only <- 
    tbl_intervals_per_HMA__non_insulin_users_only %>%
    dplyr::pull() %>%
    sum()








# Presentation.
# ## Totals.
print( paste0("Total number of people = ", n_people__all_people ) )
print( paste0("Total number of insulin users = ", n_people__insulin_users_only ) )
print( paste0("Total number of non-insulin users = ", n_people__non_insulin_users_only ) )

print( paste0("Total number of inter-test intervals for all people = ", n_intervals__all_people) )
print( paste0("Total number of inter-test intervals for insulin users people = ", n_intervals__insulin_users_only) )
print( paste0("Total number of inter-test intervals for non-insulin users people = ", n_intervals__non_insulin_users_only) )

# ## Breakdown by HMA state.
print( "The breakdown of inter-test intervals by HMA state for each cohort:" )
dplyr::left_join(
    tbl_intervals_per_HMA__all_people
    ,tbl_intervals_per_HMA__insulin_users_only
    ,by = join_by( HMA )
    ) %>%
dplyr::left_join(
    tbl_intervals_per_HMA__non_insulin_users_only
    ,by = join_by( HMA )
    ) %>%
`colnames<-`( c( "HMA", "All people", "Insulin users" , "Non-insulin users" ) ) %>%
dplyr::mutate(
    `Relative proportion` = round( `All people` / sum( `All people` ), 2 ) 
    ,`Proportion insulin users` = round( `Insulin users` / `All people`, 2 )
    ,`Relative proportion insulin users` = round( `Insulin users` / sum( `Insulin users` ), 2 ) 
    ,`Proportion non-insulin users` = round( `Non-insulin users` / `All people`, 2)
    ,`Relative proportion non-insulin users` = round( `Non-insulin users` / sum( `Non-insulin users` ), 2 ) 
) %>%
dplyr::select(
    `HMA`, `All people`, `Relative proportion`
    ,`Non-insulin users`, `Relative proportion non-insulin users`
    ,`Insulin users`, `Relative proportion insulin users`
)

print( "The relative proportion of HMA states in the cohort with an without the insulin users is the same." )
print( "This is why there is no discernable difference between the plots with and without the insulin users." )

### There appears to be no difference when I remove the 1032 people who used insulin.

[1] "Total number of people = 7808"
[1] "Total number of insulin users = 1032"
[1] "Total number of non-insulin users = 6776"
[1] "Total number of inter-test intervals for all people = 37707"
[1] "Total number of inter-test intervals for insulin users people = 7120"
[1] "Total number of inter-test intervals for non-insulin users people = 30587"
[1] "The breakdown of inter-test intervals by HMA state for each cohort:"


HMA,All people,Relative proportion,Non-insulin users,Relative proportion non-insulin users,Insulin users,Relative proportion insulin users
<fct>,<int>,<dbl>,<int>,<dbl>,<int>,<dbl>
Hold,18236,0.48,15390,0.5,2846,0.4
Monitor,14113,0.37,11348,0.37,2765,0.39
Adjust,5358,0.14,3849,0.13,1509,0.21


[1] "The relative proportion of HMA states in the cohort with an without the insulin users is the same."
[1] "This is why there is no discernable difference between the plots with and without the insulin users."


# 2. Excluding the portion of a person's record after their first prescription of insulin.

# 3. Including a clause in the definition of the HMA state that ignores insulin prescriptions.

This is the most problematic adjustment to make because I must answer _What label do I use if the prescription __is__ for insulin?_

The original problem was that insulin was being prescribed many weeks after the previous prescription of insulin, which is considered to be an adjustment to treatment. I suppose appopriate label is 'Hold' because the inter-test interval is definitely not shorter than expectedand is no consideration for the change in prescription.

But what happens if else is prescribed as well as insulin? I can't simply ignore the inter-test interval because insulin is present.

In [74]:
tbl_indicating_insulin_or_not %>%
dplyr::distinct( person_id, used_insulin ) %>%
dplyr::filter( used_insulin == TRUE ) %>% head()

# '00707AC71D614EA5F8E8FEE7145B088F2D11C84CDF11007E7F3F6AF3AE9E2E7C' is legit Adjust.
# Everyone else in the top six are  only using insulin but are labelled as Adjust.



person_id,used_insulin
<chr>,<lgl>
00707AC71D614EA5F8E8FEE7145B088F2D11C84CDF11007E7F3F6AF3AE9E2E7C,True
008A3F7E83EA3ADD8762F303E14BA83422EFE88874E57A7793A996767B589EA1,True
00E929ADBFDDFD0B9036CA33AA404D5A25D55E8C1331182997930CEA4438820F,True
01CEA86C47DDDE2235C1950159E95103156CA5EECE090515FD017B0FBC5E3A90,True
0233DE7D0E78513AE949A443C27CD8B22F02FE76B120266F5A6716A483988B22,True
024DC536C1D667E7432B65A5A6EFF6B23E94CF8A7F7CB5E33591F7B69EB30F8B,True


In [100]:
df_people_using_more_than_just_insulin <-
    df_log_PandT_longFormat_simplified_StrataLabels %>%
    dplyr::distinct( person_id, idx_test_interval, event_value ) %>%
    # Ignore the rows related to tests and that are 'Unobserved'.
    dplyr::filter(
        !stringr::str_detect( event_value, pattern = "Test" )
        ,!stringr::str_detect( event_value, pattern = "Unobserved" )
    ) %>%
    # Filter for the intervals that contain an insulin prescription.
    dplyr::group_by( person_id, idx_test_interval ) %>%
    dplyr::filter( any( event_value == "Insulin" ) ) %>%
    dplyr::ungroup() %>%
    # Now remove the rows that refer to insulin, which should leave me with the intervals that contain more than insulin.
    dplyr::filter( event_value != "Insulin" )

# I now need to compare these prescriptions with the prior window to see if these prescriptions indiated an 'Adjust' state or not.
# The first thing to do is to extrct these people from 'df_log_PandT_longFormat_simplified_StrataLabels' so that I am only 
# dealing with them.
df_people_using_more_than_just_insulin %>%
dplyr::distinct( person_id ) %>%
dplyr::inner_join(
    df_log_PandT_longFormat_simplified_StrataLabels
    ,by = join_by( person_id )
) %>%
dplyr::filter( event_value != "Insulin" )

# I now need to apply the window method to define the 'Adjust' state, but ignoring insulin.
#...

person_id,idx_test_interval,HMA,start_dttm,event_name,event_value,end_dttm,HbA1c,testType,DOB,Age,date_diagnosis,inter_test_duration_cont,inter_test_duration_discr,test_status_rollover,HMAandTestStatus,n_meds_per_test_interval,TandI,MultiMorb,TandMultiMorb
<chr>,<dbl>,<fct>,<dttm>,<chr>,<fct>,<dttm>,<dbl>,<chr>,<dttm>,<dbl>,<dttm>,<dbl>,<chr>,<chr>,<fct>,<int>,<fct>,<lgl>,<fct>
00707AC71D614EA5F8E8FEE7145B088F2D11C84CDF11007E7F3F6AF3AE9E2E7C,1,Monitor,2001-01-29 09:38:59,test,Test Status = Amber,2001-04-26 15:02:48,58.475,Median 58,1938-08-01,62.49699,1983-08-01 15:28:20,2.910480,Shorter than expected,Amber,Monitor Amber,,Amber Zero Rx,TRUE,Amber Multimorbid
00707AC71D614EA5F8E8FEE7145B088F2D11C84CDF11007E7F3F6AF3AE9E2E7C,2,,2001-04-26 15:02:48,test,Test Status = Yellow,2001-05-09 09:06:50,55.196,Median 58,1938-08-01,62.73596,1983-08-01 15:28:20,,Not applicable,Yellow,,,Yellow Zero Rx,TRUE,Yellow Multimorbid
00707AC71D614EA5F8E8FEE7145B088F2D11C84CDF11007E7F3F6AF3AE9E2E7C,3,Hold,2001-05-09 09:06:50,test,Test Status = Amber,2002-01-28 09:00:53,58.475,Median 58,1938-08-01,62.77090,1983-08-01 15:28:20,8.612770,As expected,Amber,Hold Amber,,Amber Zero Rx,TRUE,Amber Multimorbid
00707AC71D614EA5F8E8FEE7145B088F2D11C84CDF11007E7F3F6AF3AE9E2E7C,4,,2002-01-28 09:00:53,test,Test Status = Amber,2002-02-06 15:23:04,68.312,Median 58,1938-08-01,63.49418,1983-08-01 15:28:20,,Not applicable,Amber,,,Amber Zero Rx,TRUE,Amber Multimorbid
00707AC71D614EA5F8E8FEE7145B088F2D11C84CDF11007E7F3F6AF3AE9E2E7C,5,Monitor,2002-02-06 15:23:04,test,Test Status = Amber,2002-06-02 16:15:15,69.405,Median 58,1938-08-01,63.51956,1983-08-01 15:28:20,3.872137,Shorter than expected,Amber,Monitor Amber,,Amber Zero Rx,TRUE,Amber Multimorbid
00707AC71D614EA5F8E8FEE7145B088F2D11C84CDF11007E7F3F6AF3AE9E2E7C,6,Hold,2002-06-02 16:15:15,test,Test Status = Yellow,2003-01-28 00:00:00,50.824,Median 58,1938-08-01,63.83747,1983-08-01 15:28:20,7.816863,As expected,Yellow,Hold Yellow,,Yellow Zero Rx,TRUE,Yellow Multimorbid
00707AC71D614EA5F8E8FEE7145B088F2D11C84CDF11007E7F3F6AF3AE9E2E7C,7,Monitor,2003-01-28 00:00:00,test,Test Status = Amber,2003-05-01 09:49:43,58.475,Median 58,1938-08-01,64.49315,1983-08-01 15:28:20,3.113651,Shorter than expected,Amber,Monitor Amber,,Amber Zero Rx,TRUE,Amber Multimorbid
00707AC71D614EA5F8E8FEE7145B088F2D11C84CDF11007E7F3F6AF3AE9E2E7C,8,Adjust,2003-05-01 09:49:43,test,Test Status = Yellow,2005-10-19 09:33:21,49.731,Median 58,1938-08-01,64.74907,1983-08-01 15:28:20,,Not applicable,Yellow,Adjust Yellow,2,Yellow Two Rx,TRUE,Yellow Multimorbid
00707AC71D614EA5F8E8FEE7145B088F2D11C84CDF11007E7F3F6AF3AE9E2E7C,8,Adjust,2005-10-19 09:33:21,prescription,Metformin hydrochloride,2005-10-19 09:33:21,,,1938-08-01,67.21753,1983-08-01 15:28:20,,Not applicable,Yellow,Adjust Yellow,2,Yellow Two Rx,TRUE,Yellow Multimorbid
00707AC71D614EA5F8E8FEE7145B088F2D11C84CDF11007E7F3F6AF3AE9E2E7C,8,Adjust,2005-11-09 09:12:53,prescription,Metformin hydrochloride,2005-12-09 09:31:03,,,1938-08-01,67.27502,1983-08-01 15:28:20,,Not applicable,Yellow,Adjust Yellow,2,Yellow Two Rx,TRUE,Yellow Multimorbid
