# Differential expression analysis for **RNAseq data**

#Download and retrieve data from **TCGA**

##load used library (**GDC**)

In [None]:
library("TCGAbiolinks")
library("limma")
library("edgeR")
library("glmnet")
library("factoextra")
library("FactoMineR")
library("caret")
library("SummarizedExperiment")
library("gplots")
library("survival")
library("survminer")
library("RColorBrewer")
library("gProfileR")
library("genefilter")

##TCGA data

In [None]:
GDCprojects = getGDCprojects()

head(GDCprojects[c("project_id", "name")])

TCGAbiolinks:::getProjectSummary("TCGA-LIHC")

In [None]:
query_TCGA = GDCquery(
  project = "TCGA-LIHC",
  data.category = "Transcriptome Profiling", # parameter enforced by GDCquery
  experimental.strategy = "RNA-Seq",
  workflow.type = "HTSeq - Counts")

In [None]:
#visualize the results in more readblr way
lihc_res = getResults(query_TCGA) # make results as table
# head(lihc_res) # data of the first 6 patients.
colnames(lihc_res) # columns present in the table

In [None]:
summary(factor(lihc_res$sample_type)) # summary of distinct tissues types present in this study

In [None]:
query_TCGA = GDCquery(
  project = "TCGA-LIHC",
  data.category = "Transcriptome Profiling", # parameter enforced by GDCquery
  experimental.strategy = "RNA-Seq",
  workflow.type = "HTSeq - Counts",
  sample.type = c("Primary Tumor")

In [None]:
GDCdownload(query = query_TCGA)


In [None]:
tcga_data = GDCprepare(query_TCGA)

In [None]:
#check object size 
dim(tcga_data)

In [None]:
# view metadata
colnames(colData(tcga_data))


In [None]:
table(tcga_data@colData$vital_status)

In [None]:
#sample type
table(tcga_data@colData$definition)

In [None]:
table(tcga_data@colData$gender)

In [None]:
dim(assay(tcga_data))     # gene expression matrices.

In [None]:
head(assay(tcga_data)[,1:10]) # expression of first 6 genes and first 10 samples

In [None]:
# Save the data as a file

saveRDS(object = tcga_data,
        file = "tcga_data.RDS",
        compress = FALSE)

In [None]:
# while loading the data again
tcga_data = readRDS(file = "tcga_data.RDS")