# Data

In [1]:
# R PACKAGES
if(!require("pacman")) install.packages("pacman")
pacman::p_load(arrow, tidyverse, janitor, readxl, stringdist)

Loading required package: pacman


We used three ICD code lists to identify MSK conditions:
* All of the ICD10 M codes from the 2019 [CMS](https://www.cms.gov/medicare/coding-billing/icd-10-codes/icd-10-cm-icd-10-pcs-gem-archive#:~:text=2019%20ICD%2D10%20CM%20%26%20PCS%20files) ICD-10 code list
* A list of codes ('power') used to identify MSK conditions in Canada from [Power et al](https://pubmed.ncbi.nlm.nih.gov/35365584/). 
* A list of codes ('hwa') used to identify “less-urgent” MSK conditions in Australia [Thompson et al](https://ro.uow.edu.au/ahsri/375/). 

The power codes are ICD-10CA codes in short (3-4 character) format. To get these codes to match the other code lists, they were mapped into ICD-10 WHO codes. the WHO ICD-10 codes were extracted from the Python package [simple_icd_10](https://github.com/StefanoTrv/simple_icd_10/releases), which is the extedned code list from th WHO [page](https://icd.who.int/browse10/2019/en#) for the 2019 codes and from there mapped onto the 2019 CMS ICD-10CM codes

The hwa codes were in ICD-10-AM format. Unlike ICD-10-CA, Australia actually provide [maps](https://www.ihacpa.gov.au/resources/icd-10-am-and-achi-mapping-tables) (11th edition) to ICD-10-WHO, and these WHO codes were mapped into the CMS codes


In [2]:
who_2019 <- read_delim(
  "icd102019syst_codes.txt", 
  delim = ";", 
  col_names=F, 
  show_col_types=F
) |> 
  select(
    who_code = X8, 
    who_desc = X9
  )

#CMS ICD10 CM code list
cms <- read_lines(
  "icd10cm_order_2019.txt") |> 
  as_tibble() |> 
  separate_wider_regex(
    value,
    patterns = c(
    id = "[^\\s]+",         # First non-whitespace block
    "\\s+",                 # One or more spaces
    rest = ".*"             # Everything else
      )
  ) |> 
  separate_wider_regex(
    rest,
    patterns = c(
      cms_code = "[^\\s]+",         # First non-whitespace block
      "\\s+",                 # One or more spaces
      rest_again = ".*"             # Everything else
    )
  ) |> 
  separate_wider_regex(
    rest_again,
    patterns = c(
      level = "[^\\s]+",         # First non-whitespace block
      "\\s+",                 # One or more spaces
      rest_desc = ".*"             # Everything else
    )
  ) |> 
  mutate(
    rest_desc = str_replace_all(rest_desc, "\\s{2,}", "!!!")
  ) |> 
    separate_wider_delim(
      rest_desc,
      delim = "!!!",
      too_few = "align_start",
      too_many = 'merge',
      names = c("cms_desc", "cms_desc_long")
    ) |> 
  select(cms_code, cms_desc) |> 
  distinct(cms_code, .keep_all = T)

[1m[22mOne or more parsing issues, call `problems()` on your data frame for details, e.g.:
  dat <- vroom(...)
  problems(dat) 


In [3]:
# HWA (thompson) codes
# getting the codes from the text extracted from the pdf to a tibble is  long and is shown in the hwa.r file
source("hwa.R")
hwa<- hwa_f() |> 
  mutate(
    code_length = str_count(hwa_code),
    code_letter = str_sub(hwa_code,1,1)
  ) |> 
  distinct(hwa_code, .keep_all = T) 
  # see whic codes have children/parents
  hwa <- hwa |> 
    mutate(
      has_child = map_lgl(hwa_code, function(code) {
      any(str_starts(hwa$hwa_code, code) & nchar(hwa$hwa_code) > nchar(code))
    }) ,
    is_child = map_lgl(hwa_code, function(code) {
      # Generate all prefixes of the code excluding the full code itself
      prefixes <- str_sub(code, 1, seq_len(str_length(code) - 1))
      # Check if any prefix exists in the list of codes
      any(prefixes %in% hwa$hwa_code)
    })
  ) |> 
    # I dont need the Z codes, these are not diagnoses
  filter(
    code_letter != "Z"
  )

write_csv (hwa, "hwa_codes.csv")

[1m[22mNew names:
[36m•[39m `` -> `...1`


# Functions

In [4]:
describe_function <- function(df, col_name){

  print("N codes in list")
  df |> 
    count() |> 
    print()

  cat("\nCode lengths in list")
  df |> 
    group_by(code_length) |> 
    count() |> 
    print()

  cat("\nCode letters in list")
  df |> 
    group_by(code_letter) |>
    count() |>
    mutate(
      `%` = round(n/nrow(df)*100,1)
    ) |>
    print(n=120)

  cat("\nCode lengths and letters in list")
  df |> 
    group_by(code_length, code_letter) |>
    count() |>
    mutate(
      `%` = round(n/nrow(df)*100,1)
    ) |>
    ungroup() |> 
    arrange(code_letter, code_length) |> 
    print(n=120)
}

# Describe HWA code list

In [5]:
describe_function(hwa, hwa_code)

[1] "N codes in list"
[38;5;246m# A tibble: 1 × 1[39m
      n
  [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m   496

Code lengths in list[38;5;246m# A tibble: 3 × 2[39m
[38;5;246m# Groups:   code_length [3][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           3     6
[38;5;250m2[39m           4   341
[38;5;250m3[39m           5   149

Code letters in list[38;5;246m# A tibble: 8 × 3[39m
[38;5;246m# Groups:   code_letter [8][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m D               1   0.2
[38;5;250m2[39m G               5   1  
[38;5;250m3[39m I               2   0.4
[38;5;250m4[39m M             150  30.2
[38;5;250m5[39m Q               1   0.2
[38;5;250m6[39m R              13   2.6
[38;5;250m7[39m S             297  59.9
[38;5;250m8[39m T              27   5.4

Code lengths and le

## Parent and child codes

#### Describe the parent codes

In [6]:
describe_function(hwa |> 
  filter(has_child), hwa_code)

[1] "N codes in list"
[38;5;246m# A tibble: 1 × 1[39m
      n
  [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m    56

Code lengths in list[38;5;246m# A tibble: 2 × 2[39m
[38;5;246m# Groups:   code_length [2][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           3     2
[38;5;250m2[39m           4    54

Code letters in list[38;5;246m# A tibble: 2 × 3[39m
[38;5;246m# Groups:   code_letter [2][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m M              17  30.4
[38;5;250m2[39m S              39  69.6

Code lengths and letters in list[38;5;246m# A tibble: 4 × 4[39m
  code_length code_letter     n   `%`
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m           3 M               1   1.8
[38;5;250m

In [7]:
hwa |> 
  filter(has_child) |>
  print(n=120)

[38;5;246m# A tibble: 56 × 6[39m
   hwa_code hwa_desc                                code_length code_letter has_child is_child
   [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<chr>[39m[23m                                         [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<lgl>[39m[23m     [3m[38;5;246m<lgl>[39m[23m   
[38;5;250m 1[39m M069     Rheumatoid arthritis unspecified                  4 M           TRUE      FALSE   
[38;5;250m 2[39m M10      Gout                                              3 M           TRUE      FALSE   
[38;5;250m 3[39m M109     Gout unspecified                                  4 M           TRUE      TRUE    
[38;5;250m 4[39m M139     Arthritis unspecified                             4 M           TRUE      FALSE   
[38;5;250m 5[39m S508     Other superficial injuries of forearm             4 S           TRUE      FALSE   
[38;5;250m 6[39m S520     Fracture of upper end of ulna             

see which codes are children

In [8]:
hwa |> 
  filter(is_child) |>
  print(n=120)

[38;5;246m# A tibble: 106 × 6[39m
    hwa_code hwa_desc                                 code_length code_letter has_child is_child
    [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<chr>[39m[23m                                          [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<lgl>[39m[23m     [3m[38;5;246m<lgl>[39m[23m   
[38;5;250m  1[39m S4300    Dislocation of shoulder unspecified                5 S           FALSE     TRUE    
[38;5;250m  2[39m S4301    Anterior dislocation of humerus                    5 S           FALSE     TRUE    
[38;5;250m  3[39m S431     Dislocation of acromioclavicular joint             4 S           FALSE     TRUE    
[38;5;250m  4[39m S433     Disloc oth unsp parts shoulder girdle              4 S           FALSE     TRUE    
[38;5;250m  5[39m S434     Sprain and strain of shoulder joint                4 S           FALSE     TRUE    
[38;5;250m  6[39m S435     Sprain strain acromioclavi

See which codes have 1-2-1 mapping in the CMS code list

In [9]:
hwa <- hwa |> 
  mutate(
    has_cms = map_lgl(hwa_code,  ~ .x %in% cms$cms_code)
  )

In [10]:
print("N codes with 1-2-1 mapping")
hwa |>
  filter(has_cms) |>
  count() |>
  mutate(
    `%` = n/nrow(hwa)*100
  ) |>
  print()

[1] "N codes with 1-2-1 mapping"
[38;5;246m# A tibble: 1 × 2[39m
      n   `%`
  [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m   387  78.0


# Mapping

## Parent Codes

I will only need to map the parent codes and the single codes.  
First, map the parent codes

In [11]:
print("parents with 1-2-1 mapping")
hwa |>
  filter(has_cms) |>
  filter(has_child) |>
  count() |>
  mutate(
    `%` = n/nrow(hwa |> filter(has_child))*100
  ) |>
  print()


[1] "parents with 1-2-1 mapping"
[38;5;246m# A tibble: 1 × 2[39m
      n   `%`
  [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m    54  96.4


Most parent codes have 1-2-1 maps. Map these and check the mapping

In [12]:
print("which parents have 1-2-1 mapping?")
parent_cms <- hwa |>
  filter(has_cms) |>
  filter(has_child) |>
  crossing(cms) |>
  filter(
    hwa_code == cms_code
  )
options(width=200)
parent_cms |> 
  select(hwa_code, hwa_desc, cms_desc, cms_code) |>
  print(n=120)

[1] "which parents have 1-2-1 mapping?"
[38;5;246m# A tibble: 54 × 4[39m
   hwa_code hwa_desc                                cms_desc                                                                                                                       cms_code
   [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<chr>[39m[23m                                   [3m[38;5;246m<chr>[39m[23m                                                                                                                          [3m[38;5;246m<chr>[39m[23m   
[38;5;250m 1[39m M069     Rheumatoid arthritis unspecified        Rheumatoid arthritis, unspecified                                                                                              M069    
[38;5;250m 2[39m M10      Gout                                    Gout                                                                                                                           M10     
[38;5;250m 3[39m M109     Gout unspecif

In [13]:
describe_function(parent_cms, cms_code)

[1] "N codes in list"
[38;5;246m# A tibble: 1 × 1[39m
      n
  [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m    54

Code lengths in list[38;5;246m# A tibble: 2 × 2[39m
[38;5;246m# Groups:   code_length [2][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           3     2
[38;5;250m2[39m           4    52

Code letters in list[38;5;246m# A tibble: 2 × 3[39m
[38;5;246m# Groups:   code_letter [2][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m M              15  27.8
[38;5;250m2[39m S              39  72.2

Code lengths and letters in list[38;5;246m# A tibble: 4 × 4[39m
  code_length code_letter     n   `%`
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m           3 M               1   1.9
[38;5;250m

These look fine, the mappings look correct, and the sub-codes for these can be extracted becuase they are all going to be MSK (accoring to the HWA definition)

In [14]:
parent_cms_map <- parent_cms |>
  select(-cms_desc, -cms_code) |> 
  crossing(cms) |> 
  filter(
    startsWith(cms_code, hwa_code)
  )  

In [15]:
describe_function(parent_cms_map, cms_code)

[1] "N codes in list"
[38;5;246m# A tibble: 1 × 1[39m
      n
  [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m [4m1[24m[4m1[24m711

Code lengths in list[38;5;246m# A tibble: 2 × 2[39m
[38;5;246m# Groups:   code_length [2][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           3   638
[38;5;250m2[39m           4 [4m1[24m[4m1[24m073

Code letters in list[38;5;246m# A tibble: 2 × 3[39m
[38;5;246m# Groups:   code_letter [2][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m M             426   3.6
[38;5;250m2[39m S           [4m1[24m[4m1[24m285  96.4

Code lengths and letters in list[38;5;246m# A tibble: 4 × 4[39m
  code_length code_letter     n   `%`
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m

Now check out the parent codes that do not have 1-2-1 mapping

In [16]:
hwa |>
  filter(!has_cms,has_child) |>
  print(n=120)

[38;5;246m# A tibble: 2 × 7[39m
  hwa_code hwa_desc                         code_length code_letter has_child is_child has_cms
  [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<chr>[39m[23m                                  [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<lgl>[39m[23m     [3m[38;5;246m<lgl>[39m[23m    [3m[38;5;246m<lgl>[39m[23m  
[38;5;250m1[39m M139     Arthritis unspecified                      4 M           TRUE      FALSE    FALSE  
[38;5;250m2[39m M665     Spont rupture unspecified tendon           4 M           TRUE      FALSE    FALSE  


I use the [UMLS](https://uts.nlm.nih.gov/uts/umls/home) browser to find realted terms for these codes using the code description   
For M139 Arthritis unspecified the realted term is "Arthritis", which the UMLS maps down to ICD-10-CM M19.90 Arthritis Not Otherwise Specified.   
for M665 Spontaneous rupture unspecified tendon, the realted term is Non-traumatic tendon rupture, which UMLS maps to ICD-10-CM M66.9 Spontaneous rupture of unspecified tendon.   
So for these two codes, they can be mapped to the related terms

In [17]:
parent_cms_umls <- hwa |> 
  filter(!has_cms, has_child) |>
  mutate(
    code_to_map = case_when(
      hwa_code == "M139" ~ "M1990",
      hwa_code == "M665" ~ "M669"
    )
  ) |>
  crossing(cms) |>
  filter(
    startsWith(cms_code, code_to_map)
  ) |> 
  select(-code_to_map) 

parent_cms_join <- bind_rows(parent_cms_map, parent_cms_umls)

The child codes will not need to be mapped, as they've been mapped via their parent code

### Describe the parent/child code list

In [18]:
describe_function(parent_cms_join, cms_code)

[1] "N codes in list"
[38;5;246m# A tibble: 1 × 1[39m
      n
  [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m [4m1[24m[4m1[24m713

Code lengths in list[38;5;246m# A tibble: 2 × 2[39m
[38;5;246m# Groups:   code_length [2][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           3   638
[38;5;250m2[39m           4 [4m1[24m[4m1[24m075

Code letters in list[38;5;246m# A tibble: 2 × 3[39m
[38;5;246m# Groups:   code_letter [2][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m M             428   3.7
[38;5;250m2[39m S           [4m1[24m[4m1[24m285  96.3

Code lengths and letters in list[38;5;246m# A tibble: 4 × 4[39m
  code_length code_letter     n   `%`
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m

## Single codes

### Describe the single codes

In [19]:
single_codes <- hwa |> 
  filter(!has_child & !is_child) 

describe_function(single_codes, hwa_code)

[1] "N codes in list"
[38;5;246m# A tibble: 1 × 1[39m
      n
  [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m   336

Code lengths in list[38;5;246m# A tibble: 3 × 2[39m
[38;5;246m# Groups:   code_length [3][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           3     4
[38;5;250m2[39m           4   281
[38;5;250m3[39m           5    51

Code letters in list[38;5;246m# A tibble: 8 × 3[39m
[38;5;246m# Groups:   code_letter [8][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m D               1   0.3
[38;5;250m2[39m G               5   1.5
[38;5;250m3[39m I               2   0.6
[38;5;250m4[39m M             102  30.4
[38;5;250m5[39m Q               1   0.3
[38;5;250m6[39m R              13   3.9
[38;5;250m7[39m S             185  55.1
[38;5;250m8[39m T              27   8  

Code lengths and le

Now map the single codes and keep those with 1-2-1 mapping. Print these and manually check the maps

In [20]:
single_cms <- single_codes |> 
  filter(has_cms) |> 
  crossing(cms) |>
  filter(
    hwa_code == cms_code
  ) 
options(width=200)
single_cms |> 
  select(hwa_code, hwa_desc, cms_desc, cms_code) |>
  print(n=400)

[38;5;246m# A tibble: 253 × 4[39m
    hwa_code hwa_desc                                 cms_desc                                                                                                                                  cms_code
    [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<chr>[39m[23m                                    [3m[38;5;246m<chr>[39m[23m                                                                                                                                     [3m[38;5;246m<chr>[39m[23m   
[38;5;250m  1[39m D480     Neoplm unc unk beh bone articular cart   Neoplasm of uncertain behavior of bone/artic cartl                                                                                        D480    
[38;5;250m  2[39m G439     Migraine unspecified                     Migraine, unspecified                                                                                                                     G439    
[38;5;250m  3[39m G442    

Describe the single codes with 1-2-1 maps

In [21]:
describe_function(single_cms, hwa_code)

[1] "N codes in list"
[38;5;246m# A tibble: 1 × 1[39m
      n
  [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m   253

Code lengths in list[38;5;246m# A tibble: 3 × 2[39m
[38;5;246m# Groups:   code_length [3][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           3     4
[38;5;250m2[39m           4   223
[38;5;250m3[39m           5    26

Code letters in list[38;5;246m# A tibble: 8 × 3[39m
[38;5;246m# Groups:   code_letter [8][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m D               1   0.4
[38;5;250m2[39m G               5   2  
[38;5;250m3[39m I               2   0.8
[38;5;250m4[39m M              81  32  
[38;5;250m5[39m Q               1   0.4
[38;5;250m6[39m R               9   3.6
[38;5;250m7[39m S             152  60.1
[38;5;250m8[39m T               2   0.8

Code lengths and le

All of the single codes with 1-2-1 maps look OK, except T07. In ICD-10-CM, T07 has sub codes for Suicide attempt, and thus should not be included. This code will be removed. All other can be safely mapped, and thier sub-codes can be safely extracted. Map these and describe the dataset

In [22]:
single_cms_mapped <- single_cms |> 
  filter(hwa_code != "T07") |> 
  select(-cms_desc, -cms_code) |>
  crossing(cms) |>
  filter(
    startsWith(cms_code, hwa_code)
  ) |> 
  # I dont need the codes I have already mapped using the parent codes
  anti_join(
    parent_cms_join,
    join_by(cms_code)
  )  
  
describe_function(single_cms_mapped, cms_code)

[1] "N codes in list"
[38;5;246m# A tibble: 1 × 1[39m
      n
  [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m  [4m9[24m214

Code lengths in list[38;5;246m# A tibble: 3 × 2[39m
[38;5;246m# Groups:   code_length [3][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           3    31
[38;5;250m2[39m           4  [4m8[24m914
[38;5;250m3[39m           5   269

Code letters in list[38;5;246m# A tibble: 8 × 3[39m
[38;5;246m# Groups:   code_letter [8][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m D               1   0  
[38;5;250m2[39m G              32   0.3
[38;5;250m3[39m I              27   0.3
[38;5;250m4[39m M             653   7.1
[38;5;250m5[39m Q               1   0  
[38;5;250m6[39m R              11   0.1
[38;5;250m7[39m S            [4m8[24m485  92.1
[38;5;250m8[39m T               4

In [23]:
cms_join <- bind_rows(parent_cms_join, single_cms_mapped)

In [24]:
describe_function(cms_join, cms_code)

[1] "N codes in list"
[38;5;246m# A tibble: 1 × 1[39m
      n
  [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m [4m2[24m[4m0[24m927

Code lengths in list[38;5;246m# A tibble: 3 × 2[39m
[38;5;246m# Groups:   code_length [3][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           3   669
[38;5;250m2[39m           4 [4m1[24m[4m9[24m989
[38;5;250m3[39m           5   269

Code letters in list[38;5;246m# A tibble: 8 × 3[39m
[38;5;246m# Groups:   code_letter [8][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m D               1   0  
[38;5;250m2[39m G              32   0.2
[38;5;250m3[39m I              27   0.1
[38;5;250m4[39m M            [4m1[24m081   5.2
[38;5;250m5[39m Q               1   0  
[38;5;250m6[39m R              11   0.1
[38;5;250m7[39m S           [4m1[24m[4m9[24m770  94.5

Now examine the single codes without 1-2-1 mapping

In [25]:
single_nocms <- hwa |> 
  filter(!has_cms, !has_child & !is_child)

describe_function(single_nocms, hwa_code)

[1] "N codes in list"
[38;5;246m# A tibble: 1 × 1[39m
      n
  [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m    83

Code lengths in list[38;5;246m# A tibble: 2 × 2[39m
[38;5;246m# Groups:   code_length [2][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           4    58
[38;5;250m2[39m           5    25

Code letters in list[38;5;246m# A tibble: 4 × 3[39m
[38;5;246m# Groups:   code_letter [4][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m M              21  25.3
[38;5;250m2[39m R               4   4.8
[38;5;250m3[39m S              33  39.8
[38;5;250m4[39m T              25  30.1

Code lengths and letters in list[38;5;246m# A tibble: 7 × 4[39m
  code_length code_letter     n   `%`
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;2

In [26]:
single_nocms |>
  print(n=120)

[38;5;246m# A tibble: 83 × 7[39m
   hwa_code hwa_desc                                 code_length code_letter has_child is_child has_cms
   [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<chr>[39m[23m                                          [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<lgl>[39m[23m     [3m[38;5;246m<lgl>[39m[23m    [3m[38;5;246m<lgl>[39m[23m  
[38;5;250m 1[39m M074     Arthropathy in Crohn s disease K50 +               4 M           FALSE     FALSE    FALSE  
[38;5;250m 2[39m M1199    Crystal arthropathy unsp site unsp                 5 M           FALSE     FALSE    FALSE  
[38;5;250m 3[39m S497     Multiple injuries shoulder upper arm               4 S           FALSE     FALSE    FALSE  
[38;5;250m 4[39m M1981    Other specified arthrosis shoulder                 5 M           FALSE     FALSE    FALSE  
[38;5;250m 5[39m M1987    Other specified arthrosis ankle foot               5 M           FALSE    

It is posisble that these codes could be mapped using their chapter, i.e. by taking the code 1 level up and seeing if   
- Step 1) those codes are suitable (MSK)    
and    
- Step 2) whether they have 1-2-1 CMS maps   

Step 1:

In [27]:
single_nocms <- single_nocms |> 
  mutate(
    short_code = case_when(
      str_count(hwa_code) > 3 ~ str_sub(hwa_code, 1, -2),
      T ~ hwa_code
    )
  ) |> 
  left_join(
    who_2019,
    join_by(short_code == who_code)
  ) 

options(width=200)
single_nocms |>
  select(hwa_code,short_code, hwa_desc, who_desc) |>
  print(n=120)

[38;5;246m# A tibble: 83 × 4[39m
   hwa_code short_code hwa_desc                                 who_desc                                                                         
   [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<chr>[39m[23m      [3m[38;5;246m<chr>[39m[23m                                    [3m[38;5;246m<chr>[39m[23m                                                                            
[38;5;250m 1[39m M074     M07        Arthropathy in Crohn s disease K50 +     Psoriatic and enteropathic arthropathies                                         
[38;5;250m 2[39m M1199    M119       Crystal arthropathy unsp site unsp       Crystal arthropathy, unspecified                                                 
[38;5;250m 3[39m S497     S49        Multiple injuries shoulder upper arm     Other and unspecified injuries of shoulder and upper arm                         
[38;5;250m 4[39m M1981    M198       Other specified arthrosis shoulder       Other s

THey all look fine. See how many have 1-2-1 mapping in the CMS list

In [28]:
single_nocms_map <- single_nocms |> 
  select(hwa_code, hwa_desc, short_code, code_length, code_letter) |>
  crossing(cms) |>
  filter(
   cms_code == short_code
  )
options(width=200)
single_nocms_map |>
  select(hwa_code, hwa_desc, cms_desc, short_code,cms_code) |>
  print(n=120)

[38;5;246m# A tibble: 58 × 5[39m
   hwa_code hwa_desc                                 cms_desc                                                                                                                        short_code cms_code
   [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<chr>[39m[23m                                    [3m[38;5;246m<chr>[39m[23m                                                                                                                           [3m[38;5;246m<chr>[39m[23m      [3m[38;5;246m<chr>[39m[23m   
[38;5;250m 1[39m M074     Arthropathy in Crohn s disease K50 +     Enteropathic arthropathies                                                                                                      M07        M07     
[38;5;250m 2[39m M1199    Crystal arthropathy unsp site unsp       Crystal arthropathy, unspecified                                                                                                M119       M119    
[38

Of these, the following codes will need to be removed:
Z094 - not an MSK FU   
T141, T143,T144, T146: the mapping is too vague in ICD-10-CM, T14 contains subcodes for suicide   

In [29]:
single_nocms_map <- single_nocms_map |>
  filter(
    !short_code %in% c("Z094", "T141", "T143", "T144", "T146")
  ) |> 
  select(hwa_code, hwa_desc, short_code, code_length, code_letter) |> 
  crossing(cms) |>
  filter(
    startsWith(cms_code, short_code)
  ) |> 
  select(-short_code) |> 
  # I dont need the codes I have already mapped using the parent  and single codes with 1-2-1 maps
  anti_join(
    cms_join,
    join_by(cms_code)
  ) 

describe_function(single_nocms_map, cms_code)
  

[1] "N codes in list"
[38;5;246m# A tibble: 1 × 1[39m
      n
  [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m  [4m5[24m992

Code lengths in list[38;5;246m# A tibble: 2 × 2[39m
[38;5;246m# Groups:   code_length [2][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           4  [4m4[24m879
[38;5;250m2[39m           5  [4m1[24m113

Code letters in list[38;5;246m# A tibble: 4 × 3[39m
[38;5;246m# Groups:   code_letter [4][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m M            [4m1[24m454  24.3
[38;5;250m2[39m R              38   0.6
[38;5;250m3[39m S            [4m4[24m460  74.4
[38;5;250m4[39m T              40   0.7

Code lengths and letters in list[38;5;246m# A tibble: 6 × 4[39m
  code_length code_letter     n   `%`
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<chr>[39m[23m     

Create the final dataset. Use the allow combinations tribble to keep codes of the correct length for each code letter

In [30]:
cms_final <- bind_rows(cms_join, single_nocms_map) 

# Describe the final codes

In [31]:
describe_function(cms_final, cms_code)

[1] "N codes in list"
[38;5;246m# A tibble: 1 × 1[39m
      n
  [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m [4m2[24m[4m6[24m919

Code lengths in list[38;5;246m# A tibble: 3 × 2[39m
[38;5;246m# Groups:   code_length [3][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           3   669
[38;5;250m2[39m           4 [4m2[24m[4m4[24m868
[38;5;250m3[39m           5  [4m1[24m382

Code letters in list[38;5;246m# A tibble: 8 × 3[39m
[38;5;246m# Groups:   code_letter [8][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m D               1   0  
[38;5;250m2[39m G              32   0.1
[38;5;250m3[39m I              27   0.1
[38;5;250m4[39m M            [4m2[24m535   9.4
[38;5;250m5[39m Q               1   0  
[38;5;250m6[39m R              49   0.2
[38;5;250m7[39m S           [4m2[24m[4m4[24m

In [39]:
hwa_figure_a <- hwa |> 
  mutate(
    letter = str_sub(hwa_code, 1, 1)
  )  |> 
  group_by(letter) |>
  mutate(
    letter = factor(letter, 
    # levels = c("Not Mapped","M","S","T","D", "G","I","Q","R")
    ),
    n = round(n()/nrow(hwa)*100,1)
  ) |>
  ungroup() |> 
  distinct(letter, n) |> 
  mutate(
    code = "hwa"
  )

hwa_figure_b <- hwa |>
  left_join(
    cms_final,
      join_by(hwa_code)
  ) |> 
  filter(
    !is.na(cms_code)
  ) |>
  mutate(
    letter = ifelse(is.na(cms_code), "Not Mapped", str_sub(cms_code, 1, 1))
  ) |>
  group_by(letter) |>
  mutate(
    n = round(n()/nrow(cms_final)*100,1)
  ) |>
  ungroup() |>
  distinct(letter, n) |>
  mutate(
    letter = factor(
      letter, 
      # levels = c("Not Mapped","M","S","T","D", "G","I","Q","R")
    ),
    code = "CMS"
  )

hwa_figure <- hwa_figure_a |>
  bind_rows(hwa_figure_b) |> 
  mutate(
    code = ifelse(code == "hwa", "ICD-10-AM", "ICD-10-CM"),
    code = factor(
      code, 
      levels = c("ICD-10-AM", "ICD-10-CM")
    )
  )

# Save

In [40]:
write.csv(cms_join, "hwa_joined.csv")
write.csv(hwa_figure, "hwa_figure.csv")