# Data

In [None]:
# R PACKAGES
if(!require("pacman")) install.packages("pacman")
pacman::p_load(arrow, tidyverse, janitor, readxl, stringdist, ggalluvial)

In [None]:
#CMS ICD10 CM code list
cms <- read_lines(
  "icd10cm_order_2019.txt") |> 
  as_tibble() |> 
  separate_wider_regex(
    value,
    patterns = c(
    id = "[^\\s]+",         # First non-whitespace block
    "\\s+",                 # One or more spaces
    rest = ".*"             # Everything else
      )
  ) |> 
  separate_wider_regex(
    rest,
    patterns = c(
      cms_code = "[^\\s]+",         # First non-whitespace block
      "\\s+",                 # One or more spaces
      rest_again = ".*"             # Everything else
    )
  ) |> 
  separate_wider_regex(
    rest_again,
    patterns = c(
      level = "[^\\s]+",         # First non-whitespace block
      "\\s+",                 # One or more spaces
      rest_desc = ".*"             # Everything else
    )
  ) |> 
  mutate(
    rest_desc = str_replace_all(rest_desc, "\\s{2,}", "!!!")
  ) |> 
    separate_wider_delim(
      rest_desc,
      delim = "!!!",
      too_few = "align_start",
      too_many = 'merge',
      names = c("cms_desc", "cms_desc_long")
    ) |> 
  select(cms_code, cms_desc) |> 
  distinct(cms_code, .keep_all = T)

who_2019 <- read_delim(
  "icd102019syst_codes.txt", 
  delim = ";", 
  col_names=F, 
  show_col_types=F
) |> 
  select(
    who_code = X8, 
    who_desc = X9
  ) 
power_codes <- read_csv('power_10.csv', show_col_types = FALSE) |> 
  clean_names() |> 
  select(-1) |> 
  mutate(
      power_code = str_remove_all(exp_codes, "[:punct:]")
  ) |> 
  select(-exp_codes) |> 
  left_join(
      who_2019,
      join_by(power_code == who_code)
  ) |> 
  distinct(power_code, .keep_all = T)

write_csv(power_codes, "power_codes.csv")

Check if any of the power codes are not official WHO ICD-10 codes

In [None]:
power_codes |> 
  filter(is.na(who_desc)) |> 
  print()

These are fine, they are codes within ranges provded by power, and thus may not have been meant to be included and can be removed

In [33]:
power_codes <- power_codes |>
  filter(! power_code %in% c("M52", "M69"))

Describe the orginal code set

In [None]:
print(
  paste(
    "There are",
    nrow(power_codes),
    "codes in the power set"
  )
)

In [None]:
print("Number of codes by letter:")
power_codes <- power_codes |> 
  mutate(
    code_letter = str_sub(power_code, 1, 1)
  )
power_codes|> 
  group_by(code_letter) |>
  count() |>
  print()

In [None]:
print("Number of codes by code length:")
power_codes <- power_codes |> 
  mutate(
    code_length = str_count(power_code)
  ) 
power_codes |>
  group_by(code_length) |>
  count() |>
  print()

In [None]:
power_codes |> 
  group_by(code_letter, code_length) |> 
  count() |>
  print(
  )


Try and map the code 1-2-1 with the CMS codes

In [None]:
power_121 <- power_codes |>
  crossing(cms) |>
  filter(cms_code==power_code)

print(
  paste(
    "There are",
    nrow(power_121),
    "codes in the power set that map to CMS codes"
  )
)

In [None]:
options(width = 200)
power_121 |> 
  select(power_code, who_desc, cms_desc, cms_code,) |>
  print(n=140)

These look like good 1-2-1 maps, and that the sub-codes could be etracted from CMS

In [40]:
power_121 <- power_121 |> 
  select(power_code, who_desc) |>
  crossing(cms) |> 
  filter(
    startsWith(cms_code, power_code)
  )

In [None]:
power_121 |> 
  glimpse()

Examine the codes that did not have a 1-2-1 map

In [None]:
power_codes |> 
  anti_join(power_121) |>
  select(power_code, who_desc) |>
  print(n=140)

It is posisble that the 4-character codes could be mapped via thier chapter/3 character codes, but first these need to be checked to snure thy are MSK

In [None]:
power_codes_not <- power_codes |> 
  anti_join(
    power_121,
    join_by(power_code)
  ) |>
  mutate(
    code_length = str_count(power_code)
  ) |>
  filter(
    code_length >3
  ) |>
  mutate(
    short_code = str_sub(power_code,1,3)
  ) |> 
  select(
    power_code, 
    short_code,
    who_desc
  ) |> 
  crossing(cms) |> 
  filter(
    short_code == cms_code
  )

options(width = 200)
  power_codes_not |>
    print(n=30)

Those look ok to map

In [44]:
codes_not_map <- power_codes_not |> 
  select(
    power_code, 
    short_code,
    who_desc
  ) |> 
  crossing(cms) |>
  filter(
    startsWith(cms_code, short_code)
  ) |> 
  select(-short_code)

In [45]:
power_join <- power_121 |> 
  bind_rows(codes_not_map)

Describe the final code list

In [None]:
print(
  paste(
    "There are",
    nrow(power_join),
    "codes in the power set"
  )
)

In [None]:
print("Number of codes by letter:")
power_join |> 
  mutate(
    code_letter = str_sub(cms_code, 1, 1)
  ) |> 
  group_by(code_letter) |>
  count() |>
  mutate(
    `%` = n/nrow(power_join)*100
  ) |>
  print()

None of the T codes mapped over

In [None]:
power_join |> 
  mutate(
    code_length = str_count(cms_code)
  ) |> 
  group_by(code_length) |>
  count() |>
  mutate(
    `%` = n/nrow(power_join)*100
  ) |>
  print()

In [252]:
power_figure_a <- power_codes |>
  mutate(
    letter = str_sub(power_code, 1, 1)
  )  |> 
  group_by(letter) |>
  mutate(
    letter = factor(letter, levels = c("Not Mapped","M", "S", "T")),
    n = round(n()/nrow(power_codes)*100,1)
  ) |>
  ungroup() |> 
  distinct(letter, n) |> 
  mutate(
    code = "Power"
  )
power_figure_b <- power_codes |>
  left_join(
      power_join,
      join_by(power_code)
  ) |> 
  filter(
    !is.na(cms_code)
  ) |>
  mutate(
    letter = ifelse(is.na(cms_code), "Not Mapped", str_sub(cms_code, 1, 1))
  ) |>
  group_by(letter) |>
  mutate(
    n = round(n()/nrow(power_join)*100,1)
  ) |>
  ungroup() |>
  distinct(letter, n) |>
  mutate(
    letter = factor(
      letter, 
      levels = c("Not Mapped","M", "S", "T")
    ),
    code = "CMS"
  )

power_figure <- power_figure_a |>
  bind_rows(power_figure_b) |> 
  mutate(
    code = ifelse(code == "Power", "ICD-10-CA", "ICD-10-CM"),
    code = factor(
      code, 
      levels = c("ICD-10-CA", "ICD-10-CM")
    )
  )



In [None]:
power_figure

In [254]:
write_csv(power_join, "power_cms.csv")
write.csv(power_figure, "power_figure.csv")