In [None]:
library("TCGAbiolinks")
library("limma")
library("edgeR")
#library("glmnet")
library("factoextra")
library("FactoMineR")
library("caret")
library("SummarizedExperiment")
library("gplots")
library("survival")
library("survminer")
library("RColorBrewer")
library(singscore)


In [197]:
GDCprojects = getGDCprojects()

head(GDCprojects[c("project_id", "name")])
#TCGAbiolinks:::getProjectSummary("TCGA-LIHC")


Unnamed: 0_level_0,project_id,name
Unnamed: 0_level_1,<chr>,<chr>
1,TARGET-NBL,Neuroblastoma
2,GENIE-GRCC,AACR Project GENIE - Contributed by Institut Gustave Roussy
3,GENIE-DFCI,AACR Project GENIE - Contributed by Dana-Farber Cancer Institute
4,GENIE-NKI,AACR Project GENIE - Contributed by Netherlands Cancer Institute
5,GENIE-VICC,AACR Project GENIE - Contributed by Vanderbilt-Ingram Cancer Center
6,GENIE-UHN,AACR Project GENIE - Contributed by Princess Margaret Cancer Centre


In [198]:
query_TCGA = GDCquery(
  project = "TCGA-LIHC",
  data.category = "Transcriptome Profiling", # parameter enforced by GDCquery
  experimental.strategy = "RNA-Seq",
  workflow.type = "STAR - Counts")


--------------------------------------

o GDCquery: Searching in GDC database

--------------------------------------

Genome of reference: hg38

--------------------------------------------

oo Accessing GDC. This might take a while...

--------------------------------------------

ooo Project: TCGA-LIHC

--------------------

oo Filtering results

--------------------

ooo By experimental.strategy

ooo By workflow.type

----------------

oo Checking data

----------------

ooo Check if there are duplicated cases


ooo Check if there results for the query

-------------------

o Preparing output

-------------------



In [199]:
lihc_res = getResults(query_TCGA) # make results as table
# head(lihc_res) # data of the first 6 patients.
colnames(lihc_res) # columns present in the table


In [200]:
summary(factor(lihc_res$sample_type)) # summary of distinct tissues types present in this study


In [201]:
query.cnv.cases<-getResults(query_TCGA, cols="cases")
length(query.cnv.cases)
query.cnv.cases.dups<-query.cnv.cases[duplicated(query.cnv.cases)]
length(query.cnv.cases.dups)
query.cnv.cases.unique<-unique(query.cnv.cases)
length(query.cnv.cases.unique)
query.cnv.cases.nodups<-setdiff(query.cnv.cases.unique,query.cnv.cases.dups)
length(query.cnv.cases.nodups)

query_TCGA <- GDCquery(project = "TCGA-LIHC",
  data.category = "Transcriptome Profiling", # parameter enforced by GDCquery
  experimental.strategy = "RNA-Seq",
  workflow.type = "STAR - Counts",
  legacy=FALSE,
  barcode=query.cnv.cases.unique)



#tcga_data <- GDCprepare(query_TCGA)


--------------------------------------

o GDCquery: Searching in GDC database

--------------------------------------

Genome of reference: hg38

--------------------------------------------

oo Accessing GDC. This might take a while...

--------------------------------------------

ooo Project: TCGA-LIHC

--------------------

oo Filtering results

--------------------

ooo By experimental.strategy

ooo By workflow.type

ooo By barcode

----------------

oo Checking data

----------------

ooo Check if there are duplicated cases


ooo Check if there results for the query

-------------------

o Preparing output

-------------------



In [202]:
query.cnv.cases.unique

In [203]:
tcga_data <- GDCprepare(query_TCGA)



|    |cases                        |experimental_strategy |analysis_workflow_type |
|:---|:----------------------------|:---------------------|:----------------------|
|40  |TCGA-2V-A95S-01A-11R-A37K-07 |RNA-Seq               |STAR - Counts          |
|774 |TCGA-2V-A95S-01A-11R-A37K-07 |RNA-Seq               |STAR - Counts          |
|673 |TCGA-2Y-A9GS-01A-12R-A38B-07 |RNA-Seq               |STAR - Counts          |
|686 |TCGA-2Y-A9GS-01A-12R-A38B-07 |RNA-Seq               |STAR - Counts          |
|568 |TCGA-2Y-A9GT-01A-11R-A38B-07 |RNA-Seq               |STAR - Counts          |
|618 |TCGA-2Y-A9GT-01A-11R-A38B-07 |RNA-Seq               |STAR - Counts          |
|17  |TCGA-2Y-A9GU-01A-11R-A38B-07 |RNA-Seq               |STAR - Counts          |
|762 |TCGA-2Y-A9GU-01A-11R-A38B-07 |RNA-Seq               |STAR - Counts          |
|594 |TCGA-2Y-A9GV-01A-11R-A38B-07 |RNA-Seq               |STAR - Counts          |
|621 |TCGA-2Y-A9GV-01A-11R-A38B-07 |RNA-Seq               |STAR - Counts  

ERROR: Error in GDCprepare(query_TCGA): There are samples duplicated. We will not be able to prepare it


In [None]:
table(tcga_data@colData$vital_status)


In [None]:
library(RTCGA.clinical)
?clinical

In [None]:
dim(BRCA.clinical)
names(BRCA.clinical)


In [None]:
clin <- survivalTCGA(BRCA.clinical, OV.clinical, GBM.clinical, 
                     extract.cols="admin.disease_code")
# Show the first few lines
head(clin)


In [None]:
library(SummarizedExperiment)
library(TCGAbiolinks)

query_exp <- GDCquery(project = "TCGA-SARC", 
                      legacy = TRUE,
                      data.category = "Gene expression",
                      data.type = "Gene expression quantification",
                      platform = "Illumina HiSeq", 
                      file.type = "normalized_results",
                      experimental.strategy = "RNA-Seq",
                      )

GDCdownload(query_exp)

GBM_exp <- GDCprepare(query = query_exp, save = TRUE, save.filename = "GBMExp.rda")

# searching for possible outliers using the TCGAanalyze_Preprocessing function, which performs an Array Array Intensity correlation AAIC
data_prep <- TCGAanalyze_Preprocessing(object = GBM_exp, cor.cut = 0.6)                      
# using the TCGAanalyze_Normalization function for normalizing mRNA transcripts and miRNA
data_norm <- TCGAanalyze_Normalization(tabDF = data_prep,
                                      geneInfo = geneInfo,
                                      method = "gcContent")                

# using the TCGAanalyze_Filtering function to remove features
data_filt <- t(TCGAanalyze_Filtering(tabDF = data_norm,
                                    method = "quantile", 
                                    qnt.cut =  0.25))   


In [None]:
data_norm

In [None]:
time_status <- cbind(GBM_exp$days_to_death,GBM_exp$vital_status)
time_status[which(time_status[,2]=="Alive"),1] <- GBM_exp$days_to_last_follow_up[which(time_status[,2]=="Alive")]

rownames(time_status) <- GBM_exp$barcode
data_filt <- data_filt[-which(is.na(time_status[,1])),]

time_status <- time_status[-which(is.na(time_status[,1])),]
time_status[which(time_status[,2] == "Dead"),2] <- 1
time_status[which(time_status[,2] == "Alive"),2] <- 0

time_status <- data.frame(time_status)
time_status[,1] <- as.numeric(as.character(time_status[,1]))
time_status[,2] <- as.numeric(as.character(time_status[,2]))

colnames(time_status) <- c("time","status")

data_filt <- data_filt[which(rownames(data_filt) %in% rownames(time_status)),]
time_status <- time_status[rownames(data_filt),]


In [None]:
data_filt

In [None]:
time_status

In [None]:
query <- GDCquery(
    project = "TCGA-SARC", 
    data.category = "Clinical",
    data.type = "Clinical Supplement", 
    data.format = "BCR Biotab"
)
GDCdownload(query)
clinical.BCRtab.all <- GDCprepare(query)
names(clinical.BCRtab.all)


In [None]:
clinical.BCRtab.all

In [None]:
plot(fit_du)

In [None]:
BRCAOV.survInfo["cluster"] <- rep(1:2, each = 2, length.out = nrow(BRCAOV.survInfo))   # 8 integers plus two recycled 1's.


In [None]:
BRCAOV.survInfo

In [229]:
unscaled_heat_map <- as.data.frame(read.csv("unscaled_em_gmm_output_t50.csv"))

colnames(unscaled_heat_map) <- c("gene", "MP1", "MP2", "MP3", "MP4", "MP5", "MP6")
rownames(unscaled_heat_map) <- (unscaled_heat_map$gene)
unscaled_heat_map<-unscaled_heat_map[c("MP1", "MP2", "MP3", "MP4", "MP5", "MP6")]

t_heat.map <- as.data.frame(unscaled_heat_map)
t_heat.map[["Max"]] <- colnames(unscaled_heat_map)[max.col(unscaled_heat_map,ties.method="first")]

samp <- "MP6"
genes <- rownames(filter(t_heat.map, Max == samp))
genes
genes <- rownames(filter(t_heat.map, Max == c("MP1", "MP6")))
genes
samp <- "MP1_MP6_merged"
rankData <- rankGenes(data_norm)
scoredf <- simpleScore(rankData, upSet = genes)

scoredf$Quartile<-cut(scoredf$TotalScore,quantile(scoredf$TotalScore),include.lowest=TRUE,labels=FALSE)

scoredf$filtered_names <- substr(rownames(scoredf), 1, 12)
scoredf
scoredf <- scoredf[!duplicated(scoredf$filtered_names),]
rownames(scoredf) <- scoredf$filtered_names

BRCAOV.survInfo <- survivalTCGA(SARC.clinical) 

colnames(BRCAOV.survInfo) <- c("time", "barcode" , "status")

rownames(BRCAOV.survInfo) <-BRCAOV.survInfo$barcode

BRCAOV.survInfo<-BRCAOV.survInfo[rownames(scoredf),]
BRCAOV.survInfo$Quartile <- scoredf$Quartile
BRCAOV.survInfo

“6 genes missing: ACTG1, NORAD, SPIDR, KAZN, LRMDA, FP236383.3”


Unnamed: 0_level_0,TotalScore,TotalDispersion,Quartile,filtered_names
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<chr>
TCGA-SI-A71O-06A-11R-A38C-07,0.4041421,166.7925,3,TCGA-SI-A71O
TCGA-3B-A9HI-01A-11R-A38C-07,0.4064218,206.8227,4,TCGA-3B-A9HI
TCGA-3B-A9HJ-01A-11R-A38C-07,0.3733070,306.8982,1,TCGA-3B-A9HJ
TCGA-3B-A9HL-01A-11R-A38C-07,0.4123067,190.5141,4,TCGA-3B-A9HL
TCGA-3B-A9HO-01A-11R-A38C-07,0.3821937,432.1779,1,TCGA-3B-A9HO
TCGA-3B-A9HP-01A-11R-A38C-07,0.3808140,197.9271,1,TCGA-3B-A9HP
TCGA-3B-A9HQ-01A-11R-A38C-07,0.3932491,196.4445,2,TCGA-3B-A9HQ
TCGA-3B-A9HR-01A-11R-A38C-07,0.3957609,449.2278,2,TCGA-3B-A9HR
TCGA-3B-A9HS-01A-11R-A39D-07,0.4156283,187.5489,4,TCGA-3B-A9HS
TCGA-3B-A9HT-01A-11R-A39D-07,0.3842155,217.2009,1,TCGA-3B-A9HT


Unnamed: 0_level_0,time,barcode,status,Quartile
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>,<int>
TCGA-SI-A71O,434,TCGA-SI-A71O,0,3
TCGA-3B-A9HI,1521,TCGA-3B-A9HI,0,4
TCGA-3B-A9HJ,1104,TCGA-3B-A9HJ,0,1
TCGA-3B-A9HL,599,TCGA-3B-A9HL,1,4
TCGA-3B-A9HO,692,TCGA-3B-A9HO,0,1
TCGA-3B-A9HP,1627,TCGA-3B-A9HP,1,1
TCGA-3B-A9HQ,2085,TCGA-3B-A9HQ,0,2
TCGA-3B-A9HR,2577,TCGA-3B-A9HR,0,2
TCGA-3B-A9HS,1366,TCGA-3B-A9HS,1,4
TCGA-3B-A9HT,1474,TCGA-3B-A9HT,0,1


In [230]:
library(dplyr)
library(RTCGA.clinical)
library(survival)
library(survminer)
# Upload clinical data
#clinical.annot <- read.csv(file="clinical_data.csv")
# This is plotting a log-rank test which is slightly different from Cox proportional hazards
#fit_du <- survfit(Surv(OSDays, VitalStatus ) ~ Cluster, data = clinical.annot)
BRCAOV.survInfo<-filter(BRCAOV.survInfo, Quartile %in% c(1,4))

fit_du <- survfit(Surv(time,status) ~ Quartile, data = BRCAOV.survInfo)


#fit_du <- survfit(Surv(days, status_bin) ~ DiseaseDx, data = OverallSurvival_du)
# plotting the log-rank test survival curves (overall p-value is 0.54)

pdf(paste0(samp, "_Overall_Survival.pdf"), width = 10, height = 10)

ggsurvplot(fit_du,
           legend.title = "Cluster",
          # legend.labs = c("1", "2"),
           pval = TRUE,
           pval.method = TRUE,
           conf.int = TRUE,
           # Add risk table
         #  risk.table = TRUE,
           conf.int.style = "step",  # customize style of confidence intervals
         #  xlab = "Time in days",   # customize X axis label.
         #  break.time.by = 200,     # break X axis in time intervals by 200.
          # ggtheme = theme_light(), # customize plot and risk table with a theme.
           risk.table = "abs_pct",  # absolute number and percentage at risk.
           risk.table.y.text.col = T,# colour risk table text annotations.
           risk.table.y.text = FALSE,# show bars instead of names in text annotations
          # in legend of risk table.
           ncensor.plot = TRUE,      # plot the number of censored subjects at time t
           surv.median.line = "hv",
           #tables.height = 0.2,
           #tables.theme = theme_cleantable(),
           ylab = "Overall Survival Probability",
           xlab = "Time (Months)",
           cex.lab = 10,
           #palette = brewer.pal(4, "Set1"),
           #palette = brewer.pal(4, "Set1"),
           ggtheme = theme_bw(),#xscale="d_m",
)+ ggtitle("Overall Survival")

dev.off()






In [98]:
library(GSEABase)
# The example expression dataset and gene signatures are included in the package
# distribution, one can directly access them using the variable names

# To see the description of 'tgfb_expr_10_se','tgfb_gs_up','tgfb_gs_dn', look at 
# their help pages using:

# ?tgfb_expr_10_se
# ?tgfb_gs_up
# ?tgfb_gs_dn

# Have a look at the object tgfb_expr_10_se containing gene expression data
# for 10 samples 
tgfb_expr_10_se


Loading required package: annotate

Loading required package: AnnotationDbi


Attaching package: ‘AnnotationDbi’


The following object is masked from ‘package:dplyr’:

    select


Loading required package: XML

Loading required package: graph


Attaching package: ‘graph’


The following object is masked from ‘package:XML’:

    addNode




ERROR: Error in eval(expr, envir, enclos): object 'tgfb_expr_10_se' not found


In [179]:
df <- read.csv("t100_genes_per_metaprogram_6_MP.csv")
df

MP_0,MP_1,MP_2,MP_3,MP_4,MP_5,MP_6
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
MALAT1,MALAT1,MALAT1,MALAT1,MALAT1,FP236383.3,MALAT1
RBMS3,NEAT1,NEAT1,PTPRC,FN1,FP671120.4,NEAT1
AUTS2,LRMDA,FP236383.3,MBNL1,COL3A1,COL1A2,FP236383.3
FP236383.3,PLXDC2,ZBTB20,ARHGAP15,COL1A2,COL1A1,CD74
MBNL1,DPYD,QKI,PRKCH,FP236383.3,COL6A2,ZEB2
PTPRG,DOCK4,XIST,RABGAP1L,COL6A3,VIM,SAT1
LRMDA,CHST11,FNDC3B,CBLB,COL1A1,ACTB,LRMDA
NEAT1,ZEB2,PPP2R2B,SKAP1,EXT1,EEF1A1,PLXDC2
MIR99AHG,AKAP13,PDE4B,PARP8,GPC6,COL6A1,DPYD
TENM3,ELMO1,PCDH9,UTRN,NEAT1,COL5A1,SLC8A1


In [224]:

df <- read.csv("t100_genes_per_metaprogram_6_MP.csv")
samp = "MP_6-MP_1_Merged"
#genes <- as.vector(as.list(df[samp])$MP_5)
genes <- unique(c(as.list(df['MP_5'])$MP_5,  as.list(df['MP_0'])$MP_0))

rankData <- rankGenes(data_norm)
scoredf <- simpleScore(rankData, upSet = genes)

scoredf$Quartile<-cut(scoredf$TotalScore,quantile(scoredf$TotalScore),include.lowest=TRUE,labels=FALSE)

scoredf$filtered_names <- substr(rownames(scoredf), 1, 12)
scoredf
scoredf <- scoredf[!duplicated(scoredf$filtered_names),]
rownames(scoredf) <- scoredf$filtered_names

BRCAOV.survInfo <- survivalTCGA(SARC.clinical) 

colnames(BRCAOV.survInfo) <- c("time", "barcode" , "status")

rownames(BRCAOV.survInfo) <-BRCAOV.survInfo$barcode

BRCAOV.survInfo<-BRCAOV.survInfo[rownames(scoredf),]
BRCAOV.survInfo$Quartile <- scoredf$Quartile
BRCAOV.survInfo

“20 genes missing: FP236383.3, FP671120.4, ACTB, H3F3B, NORAD, ACTG1, GSE1, PRRC2C, LRMDA, MIR99AHG, TENM3, KAZN, FTX, RAD51B, ADGRL3, LINC02476, SPIDR, AC011246.1, KMT2C, PIEZO2”


Unnamed: 0_level_0,TotalScore,TotalDispersion,Quartile,filtered_names
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<chr>
TCGA-SI-A71O-06A-11R-A38C-07,0.2964873,2186.835,2,TCGA-SI-A71O
TCGA-3B-A9HI-01A-11R-A38C-07,0.3255314,1211.284,4,TCGA-3B-A9HI
TCGA-3B-A9HJ-01A-11R-A38C-07,0.2927543,2188.318,2,TCGA-3B-A9HJ
TCGA-3B-A9HL-01A-11R-A38C-07,0.3417327,1178.667,4,TCGA-3B-A9HL
TCGA-3B-A9HO-01A-11R-A38C-07,0.2877538,1694.612,1,TCGA-3B-A9HO
TCGA-3B-A9HP-01A-11R-A38C-07,0.3181372,1389.196,3,TCGA-3B-A9HP
TCGA-3B-A9HQ-01A-11R-A38C-07,0.3036647,2799.149,2,TCGA-3B-A9HQ
TCGA-3B-A9HR-01A-11R-A38C-07,0.2907370,2306.926,2,TCGA-3B-A9HR
TCGA-3B-A9HS-01A-11R-A39D-07,0.3221131,1495.943,4,TCGA-3B-A9HS
TCGA-3B-A9HT-01A-11R-A39D-07,0.2794238,2002.993,1,TCGA-3B-A9HT


Unnamed: 0_level_0,time,barcode,status,Quartile
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>,<int>
TCGA-SI-A71O,434,TCGA-SI-A71O,0,2
TCGA-3B-A9HI,1521,TCGA-3B-A9HI,0,4
TCGA-3B-A9HJ,1104,TCGA-3B-A9HJ,0,2
TCGA-3B-A9HL,599,TCGA-3B-A9HL,1,4
TCGA-3B-A9HO,692,TCGA-3B-A9HO,0,1
TCGA-3B-A9HP,1627,TCGA-3B-A9HP,1,3
TCGA-3B-A9HQ,2085,TCGA-3B-A9HQ,0,2
TCGA-3B-A9HR,2577,TCGA-3B-A9HR,0,2
TCGA-3B-A9HS,1366,TCGA-3B-A9HS,1,4
TCGA-3B-A9HT,1474,TCGA-3B-A9HT,0,1


In [225]:
library(dplyr)
library(RTCGA.clinical)
library(survival)
library(survminer)
# Upload clinical data
#clinical.annot <- read.csv(file="clinical_data.csv")
# This is plotting a log-rank test which is slightly different from Cox proportional hazards
#fit_du <- survfit(Surv(OSDays, VitalStatus ) ~ Cluster, data = clinical.annot)
BRCAOV.survInfo<-filter(BRCAOV.survInfo, Quartile %in% c(1,4))

fit_du <- survfit(Surv(time,status) ~ Quartile, data = BRCAOV.survInfo)


#fit_du <- survfit(Surv(days, status_bin) ~ DiseaseDx, data = OverallSurvival_du)
# plotting the log-rank test survival curves (overall p-value is 0.54)

pdf(paste0(samp, "_Overall_Survival_MP_rank.pdf"), width = 10, height = 10)

ggsurvplot(fit_du,
           legend.title = "Cluster",
          # legend.labs = c("1", "2"),
           pval = TRUE,
           pval.method = TRUE,
           conf.int = TRUE,
           # Add risk table
         #  risk.table = TRUE,
           conf.int.style = "step",  # customize style of confidence intervals
         #  xlab = "Time in days",   # customize X axis label.
         #  break.time.by = 200,     # break X axis in time intervals by 200.
          # ggtheme = theme_light(), # customize plot and risk table with a theme.
           risk.table = "abs_pct",  # absolute number and percentage at risk.
           risk.table.y.text.col = T,# colour risk table text annotations.
           risk.table.y.text = FALSE,# show bars instead of names in text annotations
          # in legend of risk table.
           ncensor.plot = TRUE,      # plot the number of censored subjects at time t
           surv.median.line = "hv",
           #tables.height = 0.2,
           #tables.theme = theme_cleantable(),
           ylab = "Overall Survival Probability",
           xlab = "Time (Months)",
           cex.lab = 10,
           #palette = brewer.pal(4, "Set1"),
           #palette = brewer.pal(4, "Set1"),
           ggtheme = theme_bw(),#xscale="d_m",
)+ ggtitle("Overall Survival")

dev.off()






ERROR: Error in h(simpleError(msg, call)): error in evaluating the argument 'expreMatrix' in selecting a method for function 'rankGenes': object 'tcga_data_counts' not found


ERROR: Error in parse(text = x, srcfile = src): <text>:1:58: unexpected ','
1: lapply(rownames(scoredf ,function(DF) {substring(DF,1,11),
                                                             ^


In [113]:
# Given the ranked data and gene signature, simpleScore returns the scores and 
# dispersions for each sample



Unnamed: 0_level_0,time,barcode,status,cluster
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>,<int>
396.40.0,1521,TCGA-3B-A9HI,0,1
396.40.0.1,1104,TCGA-3B-A9HJ,0,1
396.40.0.2,599,TCGA-3B-A9HL,1,2
396.40.0.3,692,TCGA-3B-A9HO,0,2
396.40.0.4,1627,TCGA-3B-A9HP,1,1
396.40.0.5,2085,TCGA-3B-A9HQ,0,1
396.40.0.6,2577,TCGA-3B-A9HR,0,2
412.37.0,1366,TCGA-3B-A9HS,1,2
412.37.0.1,1474,TCGA-3B-A9HT,0,1
412.37.0.2,768,TCGA-3B-A9HU,0,1


In [126]:
scoredf

Unnamed: 0_level_0,TotalScore,TotalDispersion,Quartile
Unnamed: 0_level_1,<dbl>,<dbl>,<int>
TCGA-SI-A71O-06A-11R-A38C-07,0.3298576,2201.6610,2
TCGA-3B-A9HI-01A-11R-A38C-07,0.3470090,1359.5442,3
TCGA-3B-A9HJ-01A-11R-A38C-07,0.3259104,1141.6020,2
TCGA-3B-A9HL-01A-11R-A38C-07,0.3595242,1368.4398,4
TCGA-3B-A9HO-01A-11R-A38C-07,0.3224175,2072.6748,2
TCGA-3B-A9HP-01A-11R-A38C-07,0.3340150,1316.5488,2
TCGA-3B-A9HQ-01A-11R-A38C-07,0.3357768,2201.6610,2
TCGA-3B-A9HR-01A-11R-A38C-07,0.3529927,1237.9710,4
TCGA-3B-A9HS-01A-11R-A39D-07,0.3546655,1052.6460,4
TCGA-3B-A9HT-01A-11R-A39D-07,0.3154286,2776.9098,1


Unnamed: 0_level_0,TotalScore,TotalDispersion,Quartile,filtered_names
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<chr>
TCGA-SI-A71O,0.3298576,2201.6610,2,TCGA-SI-A71O
TCGA-3B-A9HI,0.3470090,1359.5442,3,TCGA-3B-A9HI
TCGA-3B-A9HJ,0.3259104,1141.6020,2,TCGA-3B-A9HJ
TCGA-3B-A9HL,0.3595242,1368.4398,4,TCGA-3B-A9HL
TCGA-3B-A9HO,0.3224175,2072.6748,2,TCGA-3B-A9HO
TCGA-3B-A9HP,0.3340150,1316.5488,2,TCGA-3B-A9HP
TCGA-3B-A9HQ,0.3357768,2201.6610,2,TCGA-3B-A9HQ
TCGA-3B-A9HR,0.3529927,1237.9710,4,TCGA-3B-A9HR
TCGA-3B-A9HS,0.3546655,1052.6460,4,TCGA-3B-A9HS
TCGA-3B-A9HT,0.3154286,2776.9098,1,TCGA-3B-A9HT


Unnamed: 0_level_0,time,barcode,status,cluster,Quartile
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>,<int>,<int>
TCGA-SI-A71O,434,TCGA-SI-A71O,0,1,2
TCGA-3B-A9HI,1521,TCGA-3B-A9HI,0,1,3
TCGA-3B-A9HJ,1104,TCGA-3B-A9HJ,0,1,2
TCGA-3B-A9HL,599,TCGA-3B-A9HL,1,2,4
TCGA-3B-A9HO,692,TCGA-3B-A9HO,0,2,2
TCGA-3B-A9HP,1627,TCGA-3B-A9HP,1,1,2
TCGA-3B-A9HQ,2085,TCGA-3B-A9HQ,0,1,2
TCGA-3B-A9HR,2577,TCGA-3B-A9HR,0,2,4
TCGA-3B-A9HS,1366,TCGA-3B-A9HS,1,2,4
TCGA-3B-A9HT,1474,TCGA-3B-A9HT,0,1,1


Unnamed: 0_level_0,time,barcode,status,cluster
Unnamed: 0_level_1,<dbl>,<chr>,<dbl>,<int>
TCGA-SI-A71O,434,TCGA-SI-A71O,0,1
TCGA-3B-A9HI,1521,TCGA-3B-A9HI,0,1
TCGA-3B-A9HJ,1104,TCGA-3B-A9HJ,0,1
TCGA-3B-A9HL,599,TCGA-3B-A9HL,1,2
TCGA-3B-A9HO,692,TCGA-3B-A9HO,0,2
TCGA-3B-A9HP,1627,TCGA-3B-A9HP,1,1
TCGA-3B-A9HQ,2085,TCGA-3B-A9HQ,0,1
TCGA-3B-A9HR,2577,TCGA-3B-A9HR,0,2
TCGA-3B-A9HS,1366,TCGA-3B-A9HS,1,2
TCGA-3B-A9HT,1474,TCGA-3B-A9HT,0,1


In [131]:
scoredf

Unnamed: 0_level_0,TotalScore,TotalDispersion,Quartile,filtered_names
Unnamed: 0_level_1,<dbl>,<dbl>,<int>,<chr>
TCGA-SI-A71O,0.3298576,2201.6610,2,TCGA-SI-A71O
TCGA-3B-A9HI,0.3470090,1359.5442,3,TCGA-3B-A9HI
TCGA-3B-A9HJ,0.3259104,1141.6020,2,TCGA-3B-A9HJ
TCGA-3B-A9HL,0.3595242,1368.4398,4,TCGA-3B-A9HL
TCGA-3B-A9HO,0.3224175,2072.6748,2,TCGA-3B-A9HO
TCGA-3B-A9HP,0.3340150,1316.5488,2,TCGA-3B-A9HP
TCGA-3B-A9HQ,0.3357768,2201.6610,2,TCGA-3B-A9HQ
TCGA-3B-A9HR,0.3529927,1237.9710,4,TCGA-3B-A9HR
TCGA-3B-A9HS,0.3546655,1052.6460,4,TCGA-3B-A9HS
TCGA-3B-A9HT,0.3154286,2776.9098,1,TCGA-3B-A9HT


In [119]:
nchar('TCGA-Z4-AAPF')