In [1]:
library(gtsummary)
library(dplyr)
library(stringr)
library(lubridate)
library(flextable)


Attaching package: ‘dplyr’


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union



Attaching package: ‘lubridate’


The following objects are masked from ‘package:base’:

    date, intersect, setdiff, union


“package ‘flextable’ was built under R version 4.3.3”

Attaching package: ‘flextable’


The following object is masked from ‘package:gtsummary’:

    continuous_summary




In [2]:
# 1. Read Data
raw_data <- read.csv("data/data.csv")

In [3]:
# 2. Clean and Define Variables
table1_data <- raw_data %>%
  mutate(
    # --- AGE ---
    Date_Diagnosis = ymd(Fecha_diag),
    Age = as.numeric(Anio_diag) - as.numeric(Anio_nac),

    # --- SEX ---
    Sex = factor(Sexo, levels = c("Mujer", "Hombre"), labels = c("Female", "Male")),

    # --- RESIDENCE ---
    Residence = if_else(Parr_res == "QUITO", "Urban (Quito)", "Rural (Parishes)"),

    # --- EDUCATION ---
    Education = case_when(
      Nivel_educ == "Ninguna" ~ "None / Illiterate",
      Nivel_educ == "Primaria" ~ "Primary",
      Nivel_educ == "Secundaria" ~ "Secondary",
      Nivel_educ == "Superior" ~ "Higher (University/Technical)",
      TRUE ~ "Missing/Unknown"
    ),
    Education = factor(Education, levels = c("None / Illiterate", "Primary", "Secondary", "Higher (University/Technical)", "Missing/Unknown")),

    # --- ESTABLISHMENT ---
    Establishment = case_when(
      Establecimiento == "IESS" ~ "Social Security (IESS)",
      Establecimiento == "MSP" ~ "Ministry of Public Health (MSP)",
      Establecimiento == "SOLCA" ~ "Specialized Cancer Center (SOLCA)",
      Establecimiento == "Privados no SOLCA" ~ "Private Sector",
      TRUE ~ "Missing"
    ),

    # --- HISTOLOGY (CRITICAL FIX) ---
    # We convert to Upper Case to match safely
    Histology_Raw = toupper(Morf_tumor_CIEO3),

    Subtype = case_when(
      # 1. Hodgkin: Must contain "HODGKIN" but NOT "NO HODGKIN"
      str_detect(Histology_Raw, "HODGKIN") & !str_detect(Histology_Raw, "NO HODGKIN") ~ "Hodgkin Lymphoma",

      # 2. DLBCL: Contains "DIFUSO" (Diffuse)
      str_detect(Histology_Raw, "DIFUSO") ~ "Diffuse Large B-Cell (DLBCL)",

      # 3. Follicular
      str_detect(Histology_Raw, "FOLICULAR") ~ "Follicular Lymphoma",

      # 4. Burkitt
      str_detect(Histology_Raw, "BURKITT") ~ "Burkitt Lymphoma",

      # 5. T-Cell / NK-Cell
      str_detect(Histology_Raw, "T") | str_detect(Histology_Raw, "NK") ~ "Mature T/NK-Cell",

      # 6. Others/NOS (Everything else)
      TRUE ~ "B-Cell / NOS / Other"
    ),

    # --- PRIOR TUMORS ---
    Prior_Tumors = case_when(
      Num_tumores == 0 ~ "No prior tumor",
      Num_tumores == 1 ~ "One prior tumor",
      Num_tumores >= 2 ~ "Two or more prior tumors"
    ),
      
    # --- PRIMARY TUMOR SITE (Precise Mapping) ---
    Tumor_Site = case_when(
      grepl("^C77", Localiz_tumor_r) ~ "Nodal",
      grepl("^C16", Localiz_tumor_r) ~ "Gastric (Extranodal)",
      TRUE ~ "Other Extranodal"
    )
  ) %>%
  filter(Age >= 18)

In [4]:
# 3. Generate Table
# Update the tbl_summary call to include the "%" symbol
t1_corrected <- table1_data %>%
  select(Sex, Age, Residence, Education, Establishment, Subtype, Prior_Tumors, Tumor_Site) %>%
  tbl_summary(
    statistic = list(all_categorical() ~ "{n} ({p}%)"), # Added % here
    digits = all_categorical() ~ 1,
    label = list(
      Sex ~ "Sex",
      Age ~ "Age at Diagnosis",
      Residence ~ "Residence Area",
      Education ~ "Education Level",
      Establishment ~ "Diagnostic Establishment",
      Subtype ~ "Histological Subtype",
      Prior_Tumors ~ "History of Prior Cancer",
      Tumor_Site ~ "Primary Tumor Site"
    ),
    missing_text = "Missing"
  ) %>%
  bold_labels()

#t1_corrected

In [5]:
# 4. Export to Microsoft Word (Professional Manuscript Format)
t1_corrected %>%
  as_flex_table() %>%
  # Professional styling for Word
  fontsize(size = 9, part = "all") %>%
  autofit() %>%
  save_as_docx(path = "tables/Table1_Sociodemographics.docx")

In [6]:
# 5. Export to HTML (Best for keeping the JAMA look)
t1_corrected %>%
  as_gt() %>%
  gt::gtsave("tables/Table1_Sociodemographics.html")

print("Files saved successfully as 'tables/Table1_Sociodemographics.docx' and 'Table1_Sociodemographics.html'")

[1] "Files saved successfully as 'tables/Table1_Sociodemographics.docx' and 'Table1_Sociodemographics.html'"
