# Data

In [1]:
# R PACKAGES
if(!require("pacman")) install.packages("pacman")
pacman::p_load(arrow, tidyverse, janitor, readxl, stringdist, ggalluvial)

Loading required package: pacman


In [2]:
#CMS ICD10 CM code list
cms <- read_lines(
  "icd10cm_order_2019.txt") |> 
  as_tibble() |> 
  separate_wider_regex(
    value,
    patterns = c(
    id = "[^\\s]+",         # First non-whitespace block
    "\\s+",                 # One or more spaces
    rest = ".*"             # Everything else
      )
  ) |> 
  separate_wider_regex(
    rest,
    patterns = c(
      cms_code = "[^\\s]+",         # First non-whitespace block
      "\\s+",                 # One or more spaces
      rest_again = ".*"             # Everything else
    )
  ) |> 
  separate_wider_regex(
    rest_again,
    patterns = c(
      level = "[^\\s]+",         # First non-whitespace block
      "\\s+",                 # One or more spaces
      rest_desc = ".*"             # Everything else
    )
  ) |> 
  mutate(
    rest_desc = str_replace_all(rest_desc, "\\s{2,}", "!!!")
  ) |> 
    separate_wider_delim(
      rest_desc,
      delim = "!!!",
      too_few = "align_start",
      too_many = 'merge',
      names = c("cms_desc", "cms_desc_long")
    ) |> 
  select(cms_code, cms_desc) |> 
  distinct(cms_code, .keep_all = T)

who_2019 <- read_delim(
  "icd102019syst_codes.txt", 
  delim = ";", 
  col_names=F, 
  show_col_types=F
) |> 
  select(
    who_code = X8, 
    who_desc = X9
  ) 
power_codes <- read_csv('power_10.csv', show_col_types = FALSE) |> 
  clean_names() |> 
  select(-1) |> 
  mutate(
      power_code = str_remove_all(exp_codes, "[:punct:]")
  ) |> 
  select(-exp_codes) |> 
  left_join(
      who_2019,
      join_by(power_code == who_code)
  ) |> 
  distinct(power_code, .keep_all = T)

write_csv(power_codes, "power_codes.csv")

[1m[22mOne or more parsing issues, call `problems()` on your data frame for details, e.g.:
  dat <- vroom(...)
  problems(dat) 


Check if any of the power codea are not official WHO ICd-10 codes

In [32]:
power_codes |> 
  filter(is.na(who_desc)) |> 
  print()

[38;5;246m# A tibble: 2 × 3[39m
  power_label      power_code who_desc
  [3m[38;5;246m<chr>[39m[23m            [3m[38;5;246m<chr>[39m[23m      [3m[38;5;246m<chr>[39m[23m   
[38;5;250m1[39m Non-trauma spine M52        [31mNA[39m      
[38;5;250m2[39m Synovitis        M69        [31mNA[39m      


These are fine, they are codes within ranges provded by power, and thus may not have been meant to be included and can be removed

In [33]:
power_codes <- power_codes |>
  filter(! power_code %in% c("M52", "M69"))

Describe the orginal code set

In [34]:
print(
  paste(
    "There are",
    nrow(power_codes),
    "codes in the power set"
  )
)

[1] "There are 164 codes in the power set"


In [35]:
print("Number of codes by letter:")
power_codes <- power_codes |> 
  mutate(
    code_letter = str_sub(power_code, 1, 1)
  )
power_codes|> 
  group_by(code_letter) |>
  count() |>
  print()

[1] "Number of codes by letter:"
[38;5;246m# A tibble: 3 × 2[39m
[38;5;246m# Groups:   code_letter [3][39m
  code_letter     n
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m M              85
[38;5;250m2[39m S              63
[38;5;250m3[39m T              16


In [36]:
print("Number of codes by code length:")
power_codes <- power_codes |> 
  mutate(
    code_length = str_count(power_code)
  ) 
power_codes |>
  group_by(code_length) |>
  count() |>
  print()

[1] "Number of codes by code length:"
[38;5;246m# A tibble: 2 × 2[39m
[38;5;246m# Groups:   code_length [2][39m
  code_length     n
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m           3    88
[38;5;250m2[39m           4    76


In [37]:
power_codes |> 
  group_by(code_letter, code_length) |> 
  count() |>
  print(
  )


[38;5;246m# A tibble: 6 × 3[39m
[38;5;246m# Groups:   code_letter, code_length [6][39m
  code_letter code_length     n
  [3m[38;5;246m<chr>[39m[23m             [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m
[38;5;250m1[39m M                     3    77
[38;5;250m2[39m M                     4     8
[38;5;250m3[39m S                     3     8
[38;5;250m4[39m S                     4    55
[38;5;250m5[39m T                     3     3
[38;5;250m6[39m T                     4    13


Try and map the code 1-2-1 with the CMS codes

In [38]:
power_121 <- power_codes |>
  crossing(cms) |>
  filter(cms_code==power_code)

print(
  paste(
    "There are",
    nrow(power_121),
    "codes in the power set that map to CMS codes"
  )
)

[1] "There are 136 codes in the power set that map to CMS codes"


In [39]:
options(width = 200)
power_121 |> 
  select(power_code, who_desc, cms_desc, cms_code,) |>
  print(n=140)

[38;5;246m# A tibble: 136 × 4[39m
    power_code who_desc                                                                                    cms_desc                                                                             cms_code
    [3m[38;5;246m<chr>[39m[23m      [3m[38;5;246m<chr>[39m[23m                                                                                       [3m[38;5;246m<chr>[39m[23m                                                                                [3m[38;5;246m<chr>[39m[23m   
[38;5;250m  1[39m M45        Ankylosing spondylitis                                                                      Ankylosing spondylitis                                                               M45     
[38;5;250m  2[39m M46        Other inflammatory spondylopathies                                                          Other inflammatory spondylopathies                                                   M46     
[38;5;250m  3[39m M20     

These look like good 1-2-1 maps, and that the sub-codes could be etracted from CMS

In [40]:
power_121 <- power_121 |> 
  select(power_code, who_desc) |>
  crossing(cms) |> 
  filter(
    startsWith(cms_code, power_code)
  )

In [41]:
power_121 |> 
  glimpse()

Rows: 27,474
Columns: 4
$ power_code [3m[38;5;246m<chr>[39m[23m "M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00"[38;5;246m, [39m"M00…
$ who_desc   [3m[38;5;246m<chr>[39m[23m "Pyogenic arthritis"[38;5;246m, [39m"Pyogenic arthritis"[38;5;246m, [39m"Pyogenic arthritis"[38;5;246m, [39m"Pyogenic arthritis"[38;5;246m, [39m"Pyogenic arthritis"[38;5;246m, [39m"Pyogenic arthritis"[38;5;246m, [39m"Pyogenic arthritis"[38;5;246m, [39m"Pyogenic arthritis"[38;5;246m, [39m"Py

Examine the codes that did not have a 1-2-1 map

In [42]:
power_codes |> 
  anti_join(power_121) |>
  select(power_code, who_desc) |>
  print(n=140)

[1m[22mJoining with `by = join_by(power_code, who_desc)`
[38;5;246m# A tibble: 28 × 2[39m
   power_code who_desc                                                                                         
   [3m[38;5;246m<chr>[39m[23m      [3m[38;5;246m<chr>[39m[23m                                                                                            
[38;5;250m 1[39m M03        Postinfective and reactive arthropathies in diseases classified elsewhere                        
[38;5;250m 2[39m M09        Juvenile arthritis in diseases classified elsewhere                                              
[38;5;250m 3[39m M68        Disorders of synovium and tendon in diseases classified elsewhere                                
[38;5;250m 4[39m M73        Soft tissue disorders in diseases classified elsewhere                                           
[38;5;250m 5[39m M82        Osteoporosis in diseases classified elsewhere                                             

It is posisble that the 4-character codes could be mapped via thier chapter/3 character codes, but first these need to be checked to snure thy are MSK

In [43]:
power_codes_not <- power_codes |> 
  anti_join(
    power_121,
    join_by(power_code)
  ) |>
  mutate(
    code_length = str_count(power_code)
  ) |>
  filter(
    code_length >3
  ) |>
  mutate(
    short_code = str_sub(power_code,1,3)
  ) |> 
  select(
    power_code, 
    short_code,
    who_desc
  ) |> 
  crossing(cms) |> 
  filter(
    short_code == cms_code
  )

options(width = 200)
  power_codes_not |>
    print(n=30)

[38;5;246m# A tibble: 7 × 5[39m
  power_code short_code who_desc                                                                    cms_code cms_desc                                                                                   
  [3m[38;5;246m<chr>[39m[23m      [3m[38;5;246m<chr>[39m[23m      [3m[38;5;246m<chr>[39m[23m                                                                       [3m[38;5;246m<chr>[39m[23m    [3m[38;5;246m<chr>[39m[23m                                                                                      
[38;5;250m1[39m S133       S13        Multiple dislocations of neck                                               S13      Dislocation and sprain of joints and ligaments at neck level Dislocation and sprain of joi…
[38;5;250m2[39m S221       S22        Multiple fractures of thoracic spine                                        S22      Fracture of rib(s), sternum and thoracic spine                                             
[38;

Those look ok to map

In [44]:
codes_not_map <- power_codes_not |> 
  select(
    power_code, 
    short_code,
    who_desc
  ) |> 
  crossing(cms) |>
  filter(
    startsWith(cms_code, short_code)
  ) |> 
  select(-short_code)

In [45]:
power_join <- power_121 |> 
  bind_rows(codes_not_map)

Describe the final code list

In [46]:
print(
  paste(
    "There are",
    nrow(power_join),
    "codes in the power set"
  )
)

[1] "There are 30835 codes in the power set"


In [47]:
print("Number of codes by letter:")
power_join |> 
  mutate(
    code_letter = str_sub(cms_code, 1, 1)
  ) |> 
  group_by(code_letter) |>
  count() |>
  mutate(
    `%` = n/nrow(power_join)*100
  ) |>
  print()

[1] "Number of codes by letter:"
[38;5;246m# A tibble: 2 × 3[39m
[38;5;246m# Groups:   code_letter [2][39m
  code_letter     n   `%`
  [3m[38;5;246m<chr>[39m[23m       [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m M            [4m7[24m878  25.5
[38;5;250m2[39m S           [4m2[24m[4m2[24m957  74.5


None of the T codes mapped over

In [48]:
power_join |> 
  mutate(
    code_length = str_count(cms_code)
  ) |> 
  group_by(code_length) |>
  count() |>
  mutate(
    `%` = n/nrow(power_join)*100
  ) |>
  print()

[38;5;246m# A tibble: 5 × 3[39m
[38;5;246m# Groups:   code_length [5][39m
  code_length     n    `%`
        [3m[38;5;246m<int>[39m[23m [3m[38;5;246m<int>[39m[23m  [3m[38;5;246m<dbl>[39m[23m
[38;5;250m1[39m           3    87  0.282
[38;5;250m2[39m           4   652  2.11 
[38;5;250m3[39m           5  [4m2[24m921  9.47 
[38;5;250m4[39m           6  [4m6[24m706 21.7  
[38;5;250m5[39m           7 [4m2[24m[4m0[24m469 66.4  


In [252]:
power_figure_a <- power_codes |>
  mutate(
    letter = str_sub(power_code, 1, 1)
  )  |> 
  group_by(letter) |>
  mutate(
    letter = factor(letter, levels = c("Not Mapped","M", "S", "T")),
    n = round(n()/nrow(power_codes)*100,1)
  ) |>
  ungroup() |> 
  distinct(letter, n) |> 
  mutate(
    code = "Power"
  )
power_figure_b <- power_codes |>
  left_join(
      power_join,
      join_by(power_code)
  ) |> 
  filter(
    !is.na(cms_code)
  ) |>
  mutate(
    letter = ifelse(is.na(cms_code), "Not Mapped", str_sub(cms_code, 1, 1))
  ) |>
  group_by(letter) |>
  mutate(
    n = round(n()/nrow(power_join)*100,1)
  ) |>
  ungroup() |>
  distinct(letter, n) |>
  mutate(
    letter = factor(
      letter, 
      levels = c("Not Mapped","M", "S", "T")
    ),
    code = "CMS"
  )

power_figure <- power_figure_a |>
  bind_rows(power_figure_b) |> 
  mutate(
    code = ifelse(code == "Power", "ICD-10-CA", "ICD-10-CM"),
    code = factor(
      code, 
      levels = c("ICD-10-CA", "ICD-10-CM")
    )
  )



In [253]:
power_figure

[38;5;246m# A tibble: 5 × 3[39m
  letter     n code     
  [3m[38;5;246m<fct>[39m[23m  [3m[38;5;246m<dbl>[39m[23m [3m[38;5;246m<fct>[39m[23m    
[38;5;250m1[39m M       51.8 ICD-10-CA
[38;5;250m2[39m S       38.4 ICD-10-CA
[38;5;250m3[39m T        9.8 ICD-10-CA
[38;5;250m4[39m M       25.5 ICD-10-CM
[38;5;250m5[39m S       74.5 ICD-10-CM

In [254]:
write_csv(power_join, "power_cms.csv")
write.csv(power_figure, "power_figure.csv")