In [None]:
library(malbacR)
library(pmartR)
library(ggplot2)
library(dplyr)
library(tibble)
library(tidyr)
library(paletteer)

In [None]:
data_exp <- read.csv('./0.1_缺失值数据矩阵.csv',encoding = 'GBK') %>% 
  # mutate(Molecule = paste('Var',1:n(),sep = '')) %>% 
  column_to_rownames('Metabolites') %>% 
  rownames_to_column('Molecule') %>% 
  ## 排除一个病人对应的两个代谢组数据，其CT有问题'X040215''X040958'
  ## T001931999  这个患者，对应X040458（基线）和X041135（评效）两个血样，排除分析
  select(-c('X040215','X040958','X040458','X041135'))
dim(data_exp)
data_exp$level %>% table()

In [None]:
data_exp %>% colnames()

In [None]:
dim(data_exp)
data_exp %>% head()

In [None]:
# colnames(data_exp)
e_data <- data_exp %>% 
  select(-names(.)[2:30])
e_meta <- data_exp %>% 
  select(names(.)[1:30])

In [None]:
options(repr.matrix.max.cols = 15)
e_data %>% head()
e_meta %>% head()

In [None]:
e_meta$Molecule %>% duplicated() %>% table()

In [None]:
data_meta_raw <- read.csv('./meta_all_raw_new.csv') %>% 
    mutate(across(matches("group1|group2"), ~ case_when(
    . == 0 ~ "non-sarcopenia",
    . == 1 ~ "sarcopenia",
    TRUE ~ as.character(.)
  )))
data_meta_raw %>% dim()
data_meta_raw %>% head()

In [None]:
data_meta <- data_meta_raw %>% 
    select(c('ID','group_lv1','time_1','time_2','SMI_group1','SMI_group2','SFA_group1',
             'SFA_group2','VFA_group1','VFA_group2','SMD_group1','SMD_group2')) %>% 
    mutate(across(everything(),.fns = as.character)) %>% 
    pivot_longer(
        cols = c('time_1','time_2'),
        names_to = 'group_lv2',
        values_to = 'Sample') %>% 
    mutate(
        Sample = paste('X',Sample,sep = ''),
        group_lv1 = ifelse(group_lv1 == '1',yes = 'Development Cohort',no = 'Validation Cohort')
    )
dim(data_meta)
data_meta %>% head()

In [None]:
sample_expdata <- e_data %>% colnames() %>% .[-1]
sample_expdata[!(sample_expdata %in% (data_meta$Sample %>% unique()))]

In [None]:
library(stringr)

In [None]:
data_meta <- data_meta %>% 
    mutate(
        Sample = ifelse(
            test = Sample %in% sample_expdata,
            yes = Sample,
            no = paste('X','0',Sample %>% str_remove('X'),sep = '')
        )
    )

In [None]:
sample_expdata %>% .[!grepl('QC',.)] %>% length()
data_meta$Sample %>% unique() %>% length()

In [None]:
data_meta$Sample[!((data_meta$Sample %>% unique()) %in% sample_expdata)]

In [None]:
sample_expdata[!(sample_expdata %in% (data_meta$Sample %>% unique()))] %>% .[!grepl('QC',.)]

In [None]:
data_meta <- data_meta %>% 
  filter(!(Sample %in% c('X040215','X040958','X040458','X041135'))) %>% 
  filter(Sample %in% sample_expdata)
data_meta %>% dim()
data_meta %>% head()

In [None]:
data_meta %>% filter(group_lv1 == 'Development Cohort')  %>% pull(group_lv2) %>% unique() %>% length()

In [None]:
data_meta$Sample %>% unique() %>% length()
data_meta %>% head()

#### baseline data绘图

In [None]:
data_plot_baseline <- data_meta %>% 
    filter(group_lv2 == 'time_1') %>% 
    select(c('ID','group_lv1','Sample','SMI_group1','SFA_group1','VFA_group1','SMD_group1')) %>% 
    pivot_longer(
        cols = c('SMI_group1','SFA_group1','VFA_group1','SMD_group1'),
        names_to = 'group_lv2',
        values_to = 'group_lv3') %>% 
    filter(group_lv3 != '#N/A') %>% 
    group_by(group_lv1,group_lv2,group_lv3) %>% 
    summarise(
        Counts = n(),
        .groups = 'drop'
    ) %>% 
    mutate(
        group_lv1 = factor(group_lv1,levels = c('Development Cohort','Validation Cohort')),
        group_lv2 = factor(group_lv2,levels = c('SMI_group1','SFA_group1','VFA_group1','SMD_group1')),
        group_lv3 = factor(group_lv3,levels = c(
             'non-sarcopenia','sarcopenia'
        )),
        group_lv4 = 'baseline'
    )
data_plot_baseline %>% head()

In [None]:
data_plot_baseline$group_lv1 %>% unique()

In [None]:
options(repr.plot.width = 14,repr.plot.height = 12)
ggplot(data_plot_baseline, aes(x = group_lv3, y = Counts,fill = group_lv3,label = Counts)) +
    geom_bar(stat = "identity",position = position_dodge2(width = 0.8),alpha = 0.8) +
    geom_text(aes(y = Counts - 2),size = 10,angle = 0,color = 'white',position = position_dodge(width = 0.9)) +
    facet_wrap(group_lv1~group_lv2,ncol = 4,scales = 'free',strip.position = 'left') +
    scale_fill_manual(values = c('#F8766D','#00BFC4')) +#paletteer_d("ggsci::nrc_npg")[c(1,2)]
    scale_y_continuous(expand = c(0,0.5)) +
    labs(x = "", y = "") + 
    theme_classic() +
    theme(
        plot.margin = margin(l = 100),
        axis.text = element_text(size = 20),
        axis.text.x = element_blank(),
        axis.title = element_blank(),
        legend.position = 'top',
        legend.title = element_blank(),
        legend.text = element_text(margin = margin(t= 5,b = 5,r = 30),size = 20),
        legend.key.width = unit(2, "cm"),
        strip.placement = 'outside',
        panel.spacing.y = unit(2, "lines"),
        strip.background = element_blank(),
        strip.text = element_text(size = 20)
    )

#### Evaluation data

In [None]:
data_plot_evaluation <- data_meta %>% 
    filter(group_lv2 == 'time_2') %>% 
    select(c('ID','group_lv1','Sample','SMI_group2','SFA_group2','VFA_group2','SMD_group2')) 
data_plot_evaluation$SMI_group2 %>% unique()
data_plot_evaluation$SFA_group2 %>% unique()
data_plot_evaluation$VFA_group2 %>% unique()
data_plot_evaluation$SMD_group2 %>% unique()

In [None]:
data_plot_evaluation <- data_meta %>% 
    filter(group_lv2 == 'time_2') %>% 
    select(c('ID','group_lv1','Sample','SMI_group2','SFA_group2','VFA_group2','SMD_group2')) %>% 
    pivot_longer(
        cols = c('SMI_group2','SFA_group2','VFA_group2','SMD_group2'),
        names_to = 'group_lv2',
        values_to = 'group_lv3') %>% 
    filter(group_lv3 != '#N/A') %>% 
    group_by(group_lv1,group_lv2,group_lv3) %>% 
    summarise(
        Counts = n(),
        .groups = 'drop'
    ) %>% 
    mutate(
        group_lv1 = factor(group_lv1,levels = c('Development Cohort','Validation Cohort')),
        group_lv2 = factor(group_lv2,levels = c('SMI_group2','SFA_group2','VFA_group2','SMD_group2')),
        group_lv3 = factor(group_lv3,levels = c(
             'non-sarcopenia','sarcopenia'
        )),
        group_lv4 = 'evaluation'
    )
data_plot_evaluation %>% head()

In [None]:
options(repr.plot.width = 14,repr.plot.height = 12)
ggplot(data_plot_evaluation, aes(x = group_lv3, y = Counts,fill = group_lv3,label = Counts)) +
    geom_bar(stat = "identity",position = position_dodge2(width = 0.8),alpha = 0.8) +
    geom_text(aes(y = Counts - 2),size = 10,angle = 0,color = 'white',position = position_dodge(width = 0.9)) +
    facet_wrap(group_lv1~group_lv2,ncol = 4,scales = 'free',strip.position = 'left') +
    scale_fill_manual(values = c('#A94322','#2B5C8A')) +
    scale_y_continuous(expand = c(0,0.5)) +
    labs(x = "", y = "") + 
    theme_classic() +
    theme(
        plot.margin = margin(l = 100),
        axis.text = element_text(size = 20),
        axis.text.x = element_blank(),
        axis.title = element_blank(),
        legend.position = 'top',
        legend.title = element_blank(),
        legend.text = element_text(margin = margin(t= 5,b = 5,r = 30),size = 20),
        legend.key.width = unit(2, "cm"),
        strip.placement = 'outside',
        panel.spacing.y = unit(2, "lines"),
        strip.background = element_blank(),
        strip.text = element_text(size = 20)
    )

In [None]:
data_plot <- data_meta %>% 
    pivot_longer(
    cols = c('SMI_group1','SMI_group2','SFA_group1','SFA_group2',
             'VFA_group1','VFA_group2','SMD_group1','SMD_group2'),
    names_to = 'group_lv3',
    values_to = 'group_lv4') %>% 
    filter(group_lv4 != '#N/A') %>% 
    group_by(group_lv1,group_lv2,group_lv3,group_lv4) %>% 
    summarise(
        Counts = n(),
        .groups = 'drop'
    )  %>% 
    mutate(
        group_lv1 = factor(group_lv1,levels = c('Development Cohort','Validation Cohort')),
        group_lv2 = factor(group_lv2,levels = c('time_1','time_2')),
        group_lv3 = factor(group_lv3,levels = c(
            'SMI_group1','SFA_group1','VFA_group1','SMD_group1',
             'SMI_group2','SFA_group2','VFA_group2','SMD_group2'
        ))
    ) %>% 
    arrange(group_lv1,group_lv2,group_lv3,group_lv4)
    # select(c('group_lv1','group_lv2','group_lv3','group_lv3_label','Count')) %>% 
    # rename()
data_plot %>% dim()
data_plot

In [None]:
data_plot$group_lv3 %>% unique()

In [None]:
options(repr.plot.width = 12,repr.plot.height = 8)
ggplot(data_plot, aes(x = group_lv2, y = Counts,fill = group_lv3_label,label = Counts)) + 
  geom_bar(stat = "identity",position = position_dodge2(width = 0.8),alpha = 0.8) +
  geom_text(aes(y = Counts + 4),size = 6,angle = 0,position = position_dodge(width = 0.9)) +
  facet_wrap(~group_lv1,ncol = 4,scales = 'free',strip.position = 'left') +
  scale_fill_manual(values = c('#9E3D22','#2B5C8A')) +
  labs(x = "", y = "") + 
  theme_classic() +
  theme(
    plot.margin = margin(l = 100),
    axis.text = element_text(size = 20),
    axis.text.x = element_text(angle = 25,hjust = 1,vjust = 1,size = 16),
    axis.title = element_blank(),
    legend.position = 'top',
    legend.title = element_blank(),
    legend.text = element_text(size = 16),
    strip.placement = 'outside',
    strip.background = element_blank(),
    strip.text = element_text(size = 20)
  )

# different Level ====

In [None]:
options(repr.plot.width = 10,repr.plot.height = 10)
data_plot <- e_meta$level %>% 
  table() %>% 
  as.data.frame() %>% 
  rename_all(~c('Level','Counts'))
ggplot(data_plot, aes(x = Level, y = Counts,fill = Level)) + 
  geom_bar(stat = "identity") +
  geom_text(aes(y = Counts - 40,label = Counts),color = 'white',size = 10) +
  # scale_fill_manual(values = c('#9E3D22','#2B5C8A')) +
  scale_fill_manual(values = paletteer_d("ggsci::nrc_npg")) +
  labs(x = "Level", y = "Metabolites Count") + 
  theme_classic() +
  theme(
    axis.text = element_text(size = 20),
    axis.title = element_text(size = 24),
    axis.title.x = element_blank(),
    legend.position = 'none'
  )