In [None]:
if (!requireNamespace("BiocManager", quietly = TRUE))

  install.packages("BiocManager")

BiocManager::install("DESeq2", ask = FALSE)

install.packages("ggplot2")

library(DESeq2)

library(ggplot2)

data <- read.csv(gzfile("GSE270454_RNAseq-combined-counts-matrix.csv.gz"), row.names = 1)

dim(data)

head(data)

samples <- colnames(data)

group <- rep(NA, length(samples))



for (i in 1:length(samples)) {

  if (grepl("AD", samples[i])) {

    group[i] <- "AD"

  } else if (grepl("MCI", samples[i])) {

    group[i] <- "MCI"

  } else if (grepl("ASO", samples[i]) || grepl("ASM", samples[i])) {

    group[i] <- "MiddleAged"

  } else {

    group[i] <- "Unknown"

  }

}

colData <- data.frame(

  sample = samples,

  condition = factor(group)

)

rownames(colData) <- samples

table(colData$condition)



gene_sums <- rowSums(data)

filtered_data <- data[gene_sums >= 10, ]

cat("Before filtering:", nrow(data), "\n")

cat("After filtering :", nrow(filtered_data), "\n")



all(colnames(filtered_data) == rownames(colData))



dds <- DESeqDataSetFromMatrix(

  countData = filtered_data,

  colData = colData,

  design = ~ condition

)



dds <- DESeq(dds)

res_ad <- results(dds, contrast = c("condition", "AD", "MiddleAged"))

summary(res_ad)

sig_ad <- res_ad[which(res_ad$padj < 0.05), ]

cat("Significant genes (AD vs MiddleAged):", nrow(sig_ad), "\n")

head(sig_ad[order(sig_ad$padj), ])

res_mci <- results(dds, contrast = c("condition", "MCI", "MiddleAged"))

summary(res_mci)



sig_mci <- res_mci[which(res_mci$padj < 0.05), ]

cat("Significant genes (MCI vs MiddleAged):", nrow(sig_mci), "\n")

res_df <- as.data.frame(res_ad)

res_df$significant <- "Not Significant"

res_df$significant[res_df$padj < 0.05 & abs(res_df$log2FoldChange) > 1] <- "Significant"



ggplot(res_df, aes(x = log2FoldChange, y = -log10(pvalue), color = significant)) +

  geom_point(alpha = 0.6) +

  scale_color_manual(values = c("grey", "red")) +

  theme_minimal() +

  labs(title = "Volcano Plot: AD vs MiddleAged",

       x = "Log2 Fold Change",

       y = "-Log10(p-value)")

countData <- as.data.frame(read.csv(gzfile("GSE270454_RNAseq-combined-counts-matrix.csv.gz"), row.names = 1))

abo_expr <- as.numeric(countData["ABO", ])



abo_df <- data.frame(

  Expression = abo_expr,

  Group = colData$condition

)



ggplot(abo_df, aes(x = Group, y = Expression, fill = Group)) +

  geom_boxplot() +

  geom_jitter(width = 0.2, alpha = 0.5) +

  theme_minimal() +

  labs(title = "ABO Gene Expression", y = "Counts")