# Data

In [4]:
# R PACKAGES
if(!require("pacman")) install.packages("pacman")
pacman::p_load(arrow, tidyverse, janitor, readxl, stringdist)

In [5]:
#CMS ICD10 CM code list
cms <- read_lines(
  "icd10cm_order_2019.txt") |> 
  as_tibble() |> 
  separate_wider_regex(
    value,
    patterns = c(
    id = "[^\\s]+",         # First non-whitespace block
    "\\s+",                 # One or more spaces
    rest = ".*"             # Everything else
      )
  ) |> 
  separate_wider_regex(
    rest,
    patterns = c(
      cms_code = "[^\\s]+",         # First non-whitespace block
      "\\s+",                 # One or more spaces
      rest_again = ".*"             # Everything else
    )
  ) |> 
  separate_wider_regex(
    rest_again,
    patterns = c(
      level = "[^\\s]+",         # First non-whitespace block
      "\\s+",                 # One or more spaces
      rest_desc = ".*"             # Everything else
    )
  ) |> 
  mutate(
    rest_desc = str_replace_all(rest_desc, "\\s{2,}", "!!!")
  ) |> 
    separate_wider_delim(
      rest_desc,
      delim = "!!!",
      too_few = "align_start",
      too_many = 'merge',
      names = c("cms_desc", "cms_desc_long")
    ) |> 
  select(cms_code, cms_desc) |> 
  distinct(cms_code, .keep_all = T)


# Mapping

## Mcodes

In [None]:
# M codes from the CMS list  
mcodes <- cms |> 
    filter(str_starts(cms_code, "M")) |> 
    mutate(group = "mcode") |>
    distinct(cms_code) |> 
    mutate(
        group = "mcode"
    )

print("N M-codes:")
mcodes |> 
    count() |> 
    print()

print("N M-codes:")
mcodes |> 
    mutate(
        code_count =str_count(cms_code)
    ) |> 
    group_by(
        code_count
    ) |> 
    count() |> 
    print()

Save for later analysis

In [7]:
write_csv(mcodes, "mcodes.csv")