In [1]:
#!/usr/bin/Rscript
#############################################################
options(stringsAsFactors = FALSE)
##############################################################

suppressMessages(
    {

    library(tidyverse)
    library(data.table)
    library(dynamicTreeCut)
    library(fastcluster)
    library(WGCNA)
    library(flashClust)
    library(lattice)
    library(latticeExtra)
    library(gridExtra)
    library(grid)
    library(ggplot2)
    library(IRdisplay)

    #enableWGCNAThreads(15)
    #change that on rf    
    allowWGCNAThreads(15)

    }
                )


save_plot <- function(p, fn, w, h){
    for(ext in c(".pdf", ".png")){
        ggsave(filename=paste0(fn,ext), plot=p, width=w, height=h)
    }
}


dir.create('../_m/')
setwd('../_m/')

Allowing multi-threading with up to 15 threads.


“'../_m' already exists”


In [2]:

counts_path <- Sys.glob('../../../../jhpce_data/analysis/*/residualized_expression.tsv')
counts_path


region <- gsub('.*analysis/|/residu.*','',counts_path)
region

In [3]:
rdata_path <- Sys.glob('../../../../jhpce_data/analysis/*/voomSVA.RData')
rdata_path
#load(rdata_path)

In [4]:
#process the dataframes
process_data <- function(data, group) {
  data %>%
        dplyr::select(row.names(group)) %>%
        t() %>%
        .[goodSamplesGenes(., verbose = 3)$goodSamples, goodSamplesGenes(., verbose = 3)$goodGenes] %>%
        t()
    }

# split the expr data between groups (ie. case vs control, male vs female etc)
separate_data <- function(data,group) {
    df_tmp <- table_all %>% 
                        t() %>%
                        as.data.frame() %>%
                        tibble::rownames_to_column(var = 'row_name') %>%
                        filter(row_name %in% row.names(group)) %>%
                        tibble::column_to_rownames(var = 'row_name')
    return(df_tmp)
    
    }

# Define a function to get the max value between two vectors
max_vector <- function(x, y) { 
    if (max(x) > max(y)) x else y
        }

In [5]:
#c('group_A','group_B')
setLabels <- c('Male','Female')
setA <- setLabels[1]
setB <- setLabels[2]

In [6]:
scale_free_df = data.frame()

In [7]:

for (i in seq_along(counts_path)){
    
    load(rdata_path[i])
    
    dir.create(region[i])
    
    metadata <- v$targets %>% 
                        as.data.frame()
    
    CTL <- metadata %>% 
                    filter(Dx == 'Control')
    SZD <- metadata %>% 
                    filter(Dx != 'Control')
    CTL_SZD <- metadata

    
    for (sample_filter in c('CTL', 'SZD', 'CTL_SZD')){
        
        save_path <- paste0(region[i],'/',sample_filter,'/')
        print(save_path)
        
        dir.create(save_path)
        
        filtered_metadata <- get(sample_filter)
        
        group_a <- filtered_metadata %>% 
                                    filter(Sex == 'M') #male only
        group_b <- filtered_metadata %>% 
                                    filter(Sex != 'M') #female only   
    
        vsd<-fread(counts_path[i],header = T, na.strings = "",check.names = F) %>%
                                                tibble::column_to_rownames(var = "feature_id") #%>% slice_head(n=1000)
    
        
        # Process data for group_a and group_b
        vsd_group_a <- process_data(vsd, group_a)
        vsd_group_b <- process_data(vsd, group_b)

        # Merge the processed data
        table_all <- merge(vsd_group_a, vsd_group_b, by = "row.names") %>%
                                                tibble::column_to_rownames('Row.names') 


        datExprA <- separate_data(tabble_all, group_a)
        datExprB <- separate_data(tabble_all, group_b)
        datExprAll <- table_all %>%
                                t() %>%
                                as.data.frame()

        sampleTreeA <- flashClust(dist(datExprA), method="average")
        sampleTreeB <- flashClust(dist(datExprB), method="average")
        sampleTreeAll <- flashClust(dist(datExprAll), method="average")


        ###save as .PDF file 
        pdf(paste0(save_path,'1a-Dist_clust_',sample_filter,'.pdf'),height=10,width = 15)
        par(mar=c(2,4,1,0), mfrow=c(1,2), oma=c(2,0,4,0), cex=0.5)
        plot(sampleTreeA, main=paste0(setA, ' ', sample_filter), sub="", cex.lab=1.2, cex.axis=1.2, cex.main=1.7, ylim=20)
        plot(sampleTreeB, main=paste0(setB, ' ',sample_filter), sub="", cex.lab=1.2, cex.axis=1.2, cex.main=1.7, ylim=20)
        par(cex=1)
        title(main="Sample Clustering Based on Distance ",outer=TRUE)
        dev.off()


        ###save as .PDF file 
        pdf(paste0(save_path,'1a-Dist_clust_all_',sample_filter,'.pdf'),height=10,width = 15)
        par(cex=0.5)
        plot(sampleTreeAll, main='', sub="", cex.lab=1.2, cex.axis=1.2, cex.main=1.7, ylim=20)
        title(main="Sample Clustering Based on Distance ",outer=TRUE)
        dev.off()
        
        
        powers1 = c(seq(1,30,by = 1))
        sink('pickSoftThreshold.log')
        RpowerTableA <- pickSoftThreshold(datExprA, powerVector = powers1, RsquaredCut = 0.85, verbose = 1)
        RpowerTableB <- pickSoftThreshold(datExprB, powerVector = powers1, RsquaredCut = 0.85, verbose = 1)
        sink()
        
        
        male_sfa <- RpowerTableA$fitIndices
        male_sfa$brain_region <- region[i]
        male_sfa$sex <- 'male'
        male_sfa$status <- sample_filter
        
        female_sfa <- RpowerTableB$fitIndices
        female_sfa$brain_region <- region[i]
        female_sfa$sex <- 'female'
        female_sfa$status <- sample_filter
        
        male_female_scalefree <- rbind(male_sfa, female_sfa) %>% filter(Power == 15)
        
                
        display('male')
        display(RpowerTableA$fitIndices)
        display('female')
        display(RpowerTableB$fitIndices)

        
        #save results into a dataframe
        RpowerTableA$fitIndices %>%
                fwrite(paste0(save_path,setA,'_scale_free_metrics.tsv'),quote=F,sep='\t',row.names=F)
        RpowerTableB$fitIndices %>%
                fwrite(paste0(save_path,setB,'_scale_free_metrics.tsv'),quote=F,sep='\t',row.names=F)
        
        
        
        
        scale_free_df <- rbind(male_female_scalefree,scale_free_df)
                
        
        print(dim(datExprAll))
        
        
    
        }
    
    }

Loading required package: limma



[1] "caudate/CTL/"
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.3496741,-2.024912,0.8822997,2806.856,2741.132,5080.035186
2,0.6608887,-2.174302,0.9346218,512.6239,446.9268,1619.786125
3,0.7208429,-2.208397,0.9387765,131.5792,93.90649,689.371365
4,0.7825026,-2.122364,0.9655373,43.11499,23.36145,353.849053
5,0.8300185,-2.058906,0.9732297,16.97152,6.5628,206.418402
6,0.8884784,-1.958186,0.9884414,7.691604,2.024853,131.753267
7,0.9226762,-1.875098,0.9926598,3.8924,0.6845366,90.291854
8,0.9056655,-1.884634,0.9756146,2.150197,0.2471844,67.473358
9,0.9196377,-1.813267,0.9797545,1.274424,0.09403218,51.980018
10,0.9115704,-1.777408,0.9742303,0.7996743,0.03715845,40.942389


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.2695359,-2.64417,0.938293,3596.345,3551.221,5966.926885
2,0.5872897,-2.759733,0.9544932,784.9439,731.2969,2071.892104
3,0.7227855,-2.559145,0.9711315,226.1155,189.6071,896.687452
4,0.7948723,-2.403357,0.987351,79.21896,57.1251,456.756501
5,0.8452624,-2.285352,0.9950552,32.2516,19.25012,265.567691
6,0.8851843,-2.199938,0.9983081,14.80201,7.051731,170.142505
7,0.9096786,-2.138915,0.9987835,7.490266,2.778474,118.155547
8,0.9311006,-2.044306,0.9975647,4.108214,1.160527,85.795931
9,0.945189,-1.952445,0.9974047,2.409303,0.5091732,64.417765
10,0.9460998,-1.897533,0.992814,1.49433,0.2343647,51.180997


[1]   240 26881
[1] "caudate/SZD/"
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.2003006,-1.946639,0.9277272,3270.763,3226.372,5603.967558
2,0.5911309,-2.54097,0.9542041,660.3222,609.7837,1896.094117
3,0.7089364,-2.672953,0.9622212,178.7039,146.486,827.04521
4,0.7655529,-2.642282,0.9767817,59.60486,41.24154,423.500164
5,0.8075424,-2.526299,0.9873505,23.34926,13.01053,242.152057
6,0.8402633,-2.402437,0.9906728,10.39559,4.50096,150.00426
7,0.8631108,-2.252477,0.9934588,5.133867,1.667284,98.704025
8,0.8978474,-2.083676,0.9976534,2.759876,0.6567112,68.720427
9,0.9015547,-2.010829,0.9946423,1.591159,0.274669,50.280708
10,0.9076294,-1.92631,0.9925993,0.9721238,0.1201064,37.8091


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.1210686,-1.804585,0.9570862,4181.799,4123.795,6720.464235
2,0.5005474,-2.2854,0.9792976,1029.791,971.8188,2502.208686
3,0.722635,-2.326525,0.9918989,324.6686,285.1662,1128.344619
4,0.811556,-2.38917,0.9927678,120.9791,96.72839,594.123148
5,0.8374543,-2.519791,0.9873708,51.07176,36.25574,358.547915
6,0.8504785,-2.576956,0.9852584,23.79022,14.76956,233.980177
7,0.8803112,-2.507099,0.9910596,12.00942,6.415211,162.042646
8,0.8945522,-2.43727,0.9899476,6.484114,2.935133,117.509036
9,0.9154858,-2.316447,0.9900957,3.70729,1.40646,88.347973
10,0.9242071,-2.218744,0.9891324,2.227123,0.7020513,68.353054


[1]   153 26881
[1] "caudate/CTL_SZD/"
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.3277993,-1.711151,0.8933342,2583.578,2545.638,4893.076729
2,0.6436132,-2.084684,0.9289209,446.7761,392.859,1525.168552
3,0.7355352,-2.112397,0.9611018,110.7949,79.12264,633.272592
4,0.805592,-2.087684,0.9795687,35.50603,19.0082,319.23757
5,0.8422318,-2.101585,0.9849871,13.76738,5.211114,186.614854
6,0.8781358,-2.037921,0.9891405,6.174326,1.571247,119.548307
7,0.9000183,-1.956747,0.9912523,3.101574,0.5161285,81.677758
8,0.9047907,-1.910085,0.9926786,1.704236,0.1835615,60.264593
9,0.9124751,-1.847883,0.9940817,1.005968,0.06923926,45.987145
10,0.9190814,-1.78309,0.9958317,0.6290175,0.02722782,35.905665


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.1989951,-1.761126,0.9289889,3137.19,3107.591,5369.476433
2,0.620794,-2.42719,0.9709018,611.3836,567.0924,1778.286833
3,0.751618,-2.541308,0.9839703,160.5455,132.2023,774.221085
4,0.8342002,-2.473739,0.9945226,52.16632,36.18044,402.464789
5,0.8829365,-2.38964,0.991452,19.97648,11.13637,236.650868
6,0.9059082,-2.306891,0.9813695,8.724484,3.768165,152.031762
7,0.9472085,-2.137825,0.9912979,4.242592,1.373427,104.214449
8,0.9690615,-1.984127,0.9952603,2.255013,0.5340381,74.972456
9,0.9771822,-1.864861,0.992952,1.290755,0.219769,55.947145
10,0.9798355,-1.773695,0.9919903,0.7859957,0.09457583,43.25518


[1]   393 26881
[1] "dlpfc/CTL/"
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.04095541,-0.3457752,0.9051444,4248.095095,4078.034,7894.46684
2,0.62177826,-1.1907816,0.8568567,1212.627819,979.155,3619.00985
3,0.70038551,-1.3280993,0.7847665,484.41995,295.3534,2024.46091
4,0.84528021,-1.1613433,0.8489478,240.364383,103.2585,1266.97555
5,0.8622388,-1.2481356,0.9025844,137.666483,40.10446,992.23365
6,0.8758333,-1.2811176,0.9416563,86.918945,16.86686,810.57426
7,0.88922107,-1.2968159,0.9640157,58.770114,7.556273,676.92208
8,0.8911386,-1.3058259,0.9722133,41.773177,3.54678,574.07615
9,0.89923837,-1.3044568,0.9805046,30.833072,1.737619,492.88349
10,0.90163434,-1.3115251,0.9844795,23.435781,0.8688234,427.49387


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.0002806603,0.02625306,0.8551528,5642.154791,5446.387,10206.95756
2,0.6311916511,-0.98133563,0.9308397,1956.917795,1713.385,5464.2703
3,0.7946400837,-1.21552602,0.9516663,877.564448,642.4428,3413.45838
4,0.8369934598,-1.30116148,0.9612746,462.077608,269.9871,2326.99979
5,0.8462287251,-1.34938263,0.9641141,271.251854,122.904,1691.32072
6,0.848431747,-1.3796679,0.9658776,172.095236,59.81488,1283.21281
7,0.8505381669,-1.39152097,0.9709251,115.691311,30.6681,1005.42644
8,0.8560791644,-1.3976152,0.9760205,81.322403,16.23305,808.54324
9,0.8597674344,-1.40544648,0.9789145,59.222088,8.948642,663.98274
10,0.8647952314,-1.40831075,0.9829004,44.383796,5.104055,554.30452


[1]   212 26627
[1] "dlpfc/SZD/"
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.04039055,-0.2482559,-0.1571872,5431.697,4659.601,10239.812
2,0.46084206,-0.8385228,0.6366691,2420.9016,1284.703,6918.526
3,0.45340213,-0.7613163,0.8809857,1574.9236,446.7328,5920.175
4,0.46338929,-0.6830694,0.953201,1208.3003,180.541,5335.152
5,0.47623234,-0.6432255,0.9733765,999.6762,80.58899,4909.253
6,0.5251334,-0.6202715,0.9795865,860.296,38.92842,4571.235
7,0.56562258,-0.6060409,0.9839416,757.9292,19.74874,4290.815
8,0.59684227,-0.5965045,0.9864195,678.1982,10.36839,4051.381
9,0.62756002,-0.593021,0.9870588,613.636,5.710881,3842.682
10,0.6548899,-0.5880883,0.9862185,559.9075,3.22652,3657.908


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.0001820027,-0.03290306,0.9091717,5284.7521231,5188.973,8933.00724
2,0.4851717185,-1.28224937,0.9076254,1701.4905172,1542.52,4456.25456
3,0.6772189469,-1.59167895,0.9067683,710.9402848,559.1028,2653.54101
4,0.7340518546,-1.67761583,0.9054653,352.241326,229.5018,1746.51835
5,0.7623924388,-1.67595377,0.9084146,196.7567777,103.1791,1225.86179
6,0.7742416033,-1.63619178,0.9052542,119.9982773,49.78892,899.87913
7,0.7776469403,-1.60246055,0.8990473,78.1969567,25.35529,688.27862
8,0.7664927778,-1.59378765,0.8868371,53.6321765,13.45281,548.33327
9,0.7654383738,-1.56153582,0.8849651,38.2995669,7.411165,446.77169
10,0.7759073092,-1.52932914,0.890438,28.2529777,4.210258,370.68058


[1]   147 26627
[1] "dlpfc/CTL_SZD/"
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.005621822,-0.09187708,0.3147648,4631.65008,4100.321,9081.3147
2,0.585652391,-0.85302466,0.6259086,1811.73684,1012.679,5610.6382
3,0.600573243,-0.82816935,0.8563466,1074.84936,319.1505,4659.3128
4,0.601343285,-0.76030362,0.9365563,772.40108,117.5196,4081.6621
5,0.62741527,-0.71634748,0.9806901,607.56378,47.70502,3663.445
6,0.643819031,-0.69195532,0.983607,501.29478,21.05776,3334.1061
7,0.68812451,-0.6813135,0.9736589,425.59952,9.843248,3063.2515
8,0.726098178,-0.67317537,0.9596839,368.22862,4.750339,2834.0257
9,0.758779054,-0.66981051,0.943227,322.92199,2.402192,2636.0383
10,0.787607314,-0.67135717,0.931638,286.09567,1.24932,2462.4098


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.0748487,-0.3934636,0.8986801,4950.4078864,4765.029,9329.44574
2,0.6733882,-1.1080409,0.9389937,1567.276071,1329.483,4725.59351
3,0.8008647,-1.2979054,0.95134,660.3300509,448.6776,2827.26114
4,0.8317989,-1.3633579,0.954189,332.977248,172.0864,1865.30778
5,0.8430026,-1.3798668,0.9578883,189.5021934,72.24143,1313.36286
6,0.8317333,-1.40201,0.9545384,117.4832742,32.56021,981.96619
7,0.823284,-1.4209748,0.9541363,77.5751097,15.5044,762.18092
8,0.8155281,-1.4340973,0.9525006,53.7481013,7.669426,608.49129
9,0.8154545,-1.4345125,0.9551996,38.6738541,3.955727,496.77641
10,0.8031274,-1.4531,0.9489964,28.6867476,2.103018,412.9858


[1]   359 26627
[1] "hippocampus/CTL/"
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.02390559,-0.3460475,0.8884592,4217.247,4189.916,7862.858623
2,0.50622866,-1.3174246,0.9054981,1131.687,1026.556,3373.11854
3,0.67610041,-1.5515112,0.9279645,405.9539,314.7655,1728.45912
4,0.72409506,-1.692762,0.9345933,175.7845,112.1296,1005.215301
5,0.76099132,-1.7281304,0.948665,86.8052,44.43528,632.450627
6,0.7905533,-1.7290315,0.9592737,47.19826,19.01507,420.149431
7,0.81792745,-1.6967548,0.9673703,27.61475,8.615617,290.52983
8,0.84249032,-1.6634414,0.9740203,17.11417,4.097264,209.06285
9,0.85992407,-1.6366789,0.9785782,11.10976,2.036655,156.638903
10,0.8719195,-1.6180837,0.9833268,7.492151,1.048351,120.899421


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.07011436,-0.4598469,0.8780084,5158.9068844,4887.242,9712.87107
2,0.72018618,-1.2355016,0.9613951,1623.9301754,1345.821,4929.98595
3,0.85173819,-1.4162712,0.9710483,661.4166576,454.1382,2923.2054
4,0.87830721,-1.5034689,0.9696231,317.4509101,174.1164,1915.95355
5,0.8911919,-1.5340618,0.9724979,170.7095789,73.59127,1336.02957
6,0.8905483,-1.5504765,0.9717835,99.7499309,33.36595,974.11993
7,0.8818861,-1.5661542,0.9653548,62.0866932,15.99808,734.63077
8,0.88071021,-1.5721082,0.965112,40.6089674,8.059776,568.89712
9,0.88266737,-1.5704211,0.9675661,27.6439693,4.218047,450.07045
10,0.88707488,-1.564765,0.9701541,19.4475152,2.287073,362.38786


[1]   243 26727
[1] "hippocampus/SZD/"
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.04359873,-0.5201538,0.9725758,4518.6469882,4443.121,8041.31062
2,0.558246,-1.4198823,0.9786963,1244.3673135,1135.638,3532.43582
3,0.74901189,-1.7291837,0.9746623,447.2368362,359.6611,1893.13193
4,0.81219904,-1.8715071,0.9732844,192.0691215,131.621,1147.87188
5,0.84005455,-1.930033,0.9753028,93.8559848,53.11382,756.28902
6,0.8612044,-1.935214,0.9785397,50.5767911,23.14865,528.95331
7,0.87956582,-1.9171346,0.9862903,29.4138517,10.72749,386.81741
8,0.88149454,-1.8996096,0.9837278,18.1765907,5.214425,292.7512
9,0.87317069,-1.8896925,0.9771427,11.798389,2.618545,227.65204
10,0.86159545,-1.8784419,0.9715053,7.9740745,1.368999,180.95882


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.05679536,-0.6128742,0.9207854,5263.8562805,5040.813,9299.26897
2,0.58824203,-1.5220942,0.9502607,1638.7942078,1430.079,4611.54139
3,0.77645361,-1.7806816,0.9582755,648.8892794,498.6296,2694.32602
4,0.81882809,-1.9087084,0.9497474,300.5220634,198.18,1737.77201
5,0.84363135,-1.9346035,0.9560758,155.6735806,86.48628,1198.63909
6,0.85795191,-1.9252813,0.9627612,87.7299445,40.58347,868.01987
7,0.85060902,-1.9279179,0.9581742,52.7934927,20.17923,652.20373
8,0.85165563,-1.9147688,0.9604592,33.4797701,10.50486,504.36777
9,0.82914547,-1.9339081,0.948886,22.1589351,5.683176,399.14119
10,0.84433421,-1.8968506,0.958587,15.1952323,3.177589,321.87234


[1]   132 26727
[1] "hippocampus/CTL_SZD/"
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.02909356,-0.3335507,0.9346659,4036.498,4003.268,7729.978102
2,0.54360659,-1.333479,0.9279818,1047.745,941.0015,3292.438248
3,0.7038872,-1.6208922,0.9431905,366.0994,278.3912,1691.063128
4,0.77625896,-1.7146893,0.9636949,155.2171,95.84107,974.617306
5,0.80392103,-1.7785377,0.9713541,75.3609,36.74948,617.188594
6,0.81066857,-1.8261414,0.9702699,40.42582,15.14688,416.015698
7,0.82064536,-1.8334589,0.9752038,23.40069,6.702155,293.775934
8,0.82823691,-1.8230833,0.978041,14.38074,3.098788,215.119327
9,0.8317153,-1.8124508,0.9771551,9.273508,1.499254,162.16202
10,0.8388103,-1.7867773,0.9772088,6.221,0.7521746,125.178061


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.09962681,-0.5236098,0.931518,4709.7351951,4459.442,9216.34274
2,0.71818034,-1.2948392,0.9682178,1382.8647319,1133.411,4520.70377
3,0.83636771,-1.5090523,0.9657981,533.2447488,356.6499,2610.4936
4,0.87238527,-1.5856279,0.9695112,244.8242769,128.8883,1663.44443
5,0.88478687,-1.6095379,0.9722012,126.8791974,51.12581,1133.44875
6,0.8861529,-1.618761,0.9729478,71.8480169,21.92256,811.52168
7,0.88885858,-1.6186377,0.9755335,43.5243559,10.11302,603.5158
8,0.89100244,-1.6171027,0.9780732,27.8005235,4.868466,461.88913
9,0.8930598,-1.6135541,0.981067,18.5309851,2.425969,361.71239
10,0.90095451,-1.599465,0.9845741,12.793008,1.254765,288.62414


[1]   375 26727


In [8]:
scale_free_df

Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.,brain_region,sex,status
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
15,0.8603353,-1.6496386,0.9721536,1.2683011,0.0356344068,43.80547,hippocampus,male,CTL_SZD
15,0.912947,-1.5555803,0.9899227,2.8848024,0.0712492655,114.24422,hippocampus,female,CTL_SZD
15,0.8998215,-1.7202108,0.9895271,1.6997666,0.0819433563,72.02038,hippocampus,male,SZD
15,0.8667655,-1.8004127,0.9775726,3.3379508,0.257240598,133.03247,hippocampus,female,SZD
15,0.8503683,-1.7506404,0.9813398,1.5508506,0.055742711,51.48229,hippocampus,male,CTL
15,0.8814454,-1.5697381,0.9728334,4.7189026,0.1549152156,151.23476,hippocampus,female,CTL
15,0.8758955,-0.6907531,0.8932539,171.957857,0.0732892874,1833.11636,dlpfc,male,CTL_SZD
15,0.7950718,-1.4876494,0.9513087,8.7369342,0.1303790612,196.7397,dlpfc,female,CTL_SZD
15,0.7395474,-0.5961472,0.9677738,383.2604547,0.2555396597,2963.91928,dlpfc,male,SZD
15,0.8368959,-1.418601,0.9370512,8.5199092,0.3590744056,175.80627,dlpfc,female,SZD


In [9]:
dev.off()

### We didn't achievied a scale-free topology index (R^2 >= 0.85) on some DLPFC networks using all samples from this brain region, so let's try to remove some possible outliers samples based on  DLPFC hierarchical dendograms ('_m/dlpfc/{CTL,SZD}/*.pdf'), lets try to remove some outliers samples to try to achieve a scale free network

In [10]:
# run script again, but only with dlpfc data, removing 3 DLPFC samples

scale_free_df2 = data.frame()


for (i in 2){ #dlpfc expression data is the second vector from counts_path
    
    load(rdata_path[i])
    
    dir.create(region[i])
    
    metadata <- v$targets %>% 
                        as.data.frame()
    
    CTL <- metadata %>% 
                    filter(Dx == 'Control')
    SZD <- metadata %>% 
                    filter(Dx != 'Control')
    CTL_SZD <- metadata

    
    for (sample_filter in c('CTL', 'SZD', 'CTL_SZD')){
            
        save_path <- paste0(region[i],'/',sample_filter,'/')
        print(save_path)
        
        dir.create(save_path)
        
        filtered_metadata <- get(sample_filter)
        
        group_a <- filtered_metadata %>% 
                                    filter(Sex == 'M') %>% #male only
                                    filter(RNum != 'R3555') #remove this specific sample, huge outlier
        group_b <- filtered_metadata %>% 
                                    filter(Sex != 'M') %>% #female only 
                                    filter(!RNum %in% c('R12351', 'R12371'))  #remove these specific samples, outliers
    
        vsd<-fread(counts_path[i],header = T, na.strings = "",check.names = F) %>%
                                                tibble::column_to_rownames(var = "feature_id") #%>% slice_head(n=1000)
    
        
        # Process data for group_a and group_b
        vsd_group_a <- process_data(vsd, group_a)
        vsd_group_b <- process_data(vsd, group_b)

        # Merge the processed data
        table_all <- merge(vsd_group_a, vsd_group_b, by = "row.names") %>%
                                                tibble::column_to_rownames('Row.names') 


        datExprA <- separate_data(tabble_all, group_a)
        datExprB <- separate_data(tabble_all, group_b)
        datExprAll <- table_all %>%
                                t() %>%
                                as.data.frame()

        sampleTreeA <- flashClust(dist(datExprA), method="average")
        sampleTreeB <- flashClust(dist(datExprB), method="average")
        sampleTreeAll <- flashClust(dist(datExprAll), method="average")

        
        powers1 = c(seq(1,30,by = 1))
        # sink('pickSoftThreshold.log')
        RpowerTableA <- pickSoftThreshold(datExprA, powerVector = powers1, RsquaredCut = 0.85, verbose = 1)
        RpowerTableB <- pickSoftThreshold(datExprB, powerVector = powers1, RsquaredCut = 0.85, verbose = 1)
        # sink()
        
        
        male_sfa <- RpowerTableA$fitIndices
        male_sfa$brain_region <- region[i]
        male_sfa$sex <- 'male'
        male_sfa$status <- sample_filter
        
        female_sfa <- RpowerTableB$fitIndices
        female_sfa$brain_region <- region[i]
        female_sfa$sex <- 'female'
        female_sfa$status <- sample_filter
        
        male_female_scalefree <- rbind(male_sfa, female_sfa) %>% filter(Power == 15)
        
                
        display('male')
        display(RpowerTableA$fitIndices)
        display('female')
        display(RpowerTableB$fitIndices)

        
        RpowerTableA$fitIndices #%>%
                #fwrite(paste0(save_path,setA,'_scale_free_metrics.tsv'),quote=F,sep='\t',row.names=F)
        RpowerTableB$fitIndices #%>%
                #fwrite(paste0(save_path,setB,'_scale_free_metrics.tsv'),quote=F,sep='\t',row.names=F)
        
        
        
        
        scale_free_df2 <- rbind(male_female_scalefree,scale_free_df2)
        
        
        print(dim(datExprAll))
                
    
        }
    
    }

“'dlpfc' already exists”


[1] "dlpfc/CTL/"


“'dlpfc/CTL' already exists”


 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
pickSoftThreshold: will use block size 1680.
 pickSoftThreshold: calculating connectivity for given powers... 
   Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k. max.k.
1      1    0.041 -0.346          0.905 4250.000  4.08e+03 7890.0
2      2    0.622 -1.190          0.857 1210.000  9.79e+02 3620.0
3      3    0.700 -1.330          0.785  484.000  2.95e+02 2020.0
4      4    0.845 -1.160          0.849  240.000  1.03e+02 1270.0
5      5    0.862 -1.250          0.903  138.000  4.01e+01  992.0
6      6    0.876 -1.280          0.942   86.900  1.69e+01  811.0
7      7    0.889 -1.300          0.964   58.800  7.56e+00  677.0
8      8    0.891 -1.310          0.972   41.800  3.55e+00  574.0
9      9   

Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.04095541,-0.3457752,0.9051444,4248.095095,4078.034,7894.46684
2,0.62177826,-1.1907816,0.8568567,1212.627819,979.155,3619.00985
3,0.70038551,-1.3280993,0.7847665,484.41995,295.3534,2024.46091
4,0.84528021,-1.1613433,0.8489478,240.364383,103.2585,1266.97555
5,0.8622388,-1.2481356,0.9025844,137.666483,40.10446,992.23365
6,0.8758333,-1.2811176,0.9416563,86.918945,16.86686,810.57426
7,0.88922107,-1.2968159,0.9640157,58.770114,7.556273,676.92208
8,0.8911386,-1.3058259,0.9722133,41.773177,3.54678,574.07615
9,0.89923837,-1.3044568,0.9805046,30.833072,1.737619,492.88349
10,0.90163434,-1.3115251,0.9844795,23.435781,0.8688234,427.49387


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.04846569,-0.448745,0.9076984,5095.6460215,5018.833,9157.8835
2,0.55342853,-1.296348,0.9027947,1614.7414896,1437.909,4522.68108
3,0.66591473,-1.524434,0.8863348,672.5411778,502.8415,2635.50383
4,0.67423595,-1.610267,0.8651729,334.3049506,199.9281,1692.96782
5,0.68234284,-1.593719,0.8511074,187.8289689,87.08723,1160.67826
6,0.69181294,-1.524999,0.8319932,115.2833195,40.65716,834.09258
7,0.82410232,-1.351372,0.9211666,75.5738127,19.99418,621.29208
8,0.83059629,-1.40022,0.9428705,52.1055949,10.2917,515.8746
9,0.83999115,-1.436666,0.9595682,37.3757166,5.478758,437.90842
10,0.85113457,-1.454271,0.9702832,27.6741449,3.027631,376.16004


   Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k. max.k.
1      1   0.0485 -0.449          0.908 5100.000  5.02e+03 9160.0
2      2   0.5530 -1.300          0.903 1610.000  1.44e+03 4520.0
3      3   0.6660 -1.520          0.886  673.000  5.03e+02 2640.0
4      4   0.6740 -1.610          0.865  334.000  2.00e+02 1690.0
5      5   0.6820 -1.590          0.851  188.000  8.71e+01 1160.0
6      6   0.6920 -1.520          0.832  115.000  4.07e+01  834.0
7      7   0.8240 -1.350          0.921   75.600  2.00e+01  621.0
8      8   0.8310 -1.400          0.943   52.100  1.03e+01  516.0
9      9   0.8400 -1.440          0.960   37.400  5.48e+00  438.0
10    10   0.8510 -1.450          0.970   27.700  3.03e+00  376.0
11    11   0.8580 -1.460          0.980   21.000  1.71e+00  326.0
12    12   0.8540 -1.480          0.977   16.300  9.93e-01  285.0
13    13   0.8580 -1.500          0.979   12.900  5.91e-01  251.0
14    14   0.8620 -1.500          0.982   10.400  3.59e-01  222.0
15    15  

“'dlpfc/SZD' already exists”


 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
pickSoftThreshold: will use block size 1680.
 pickSoftThreshold: calculating connectivity for given powers... 
   Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k. max.k.
1      1   0.0618 -0.579          0.921 4300.000  4.24e+03 7710.0
2      2   0.5260 -1.450          0.877 1180.000  1.04e+03 3380.0
3      3   0.6270 -1.630          0.825  442.000  3.20e+02 1800.0
4      4   0.7830 -1.400          0.867  203.000  1.12e+02 1080.0
5      5   0.8660 -1.440          0.945  109.000  4.36e+01  797.0
6      6   0.8900 -1.480          0.972   64.500  1.83e+01  637.0
7      7   0.8910 -1.490          0.974   41.400  8.15e+00  522.0
8      8   0.8830 -1.500          0.967   28.200  3.81e+00  436.0
9      9   

Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.061842,-0.5785079,0.9212189,4300.3922326,4238.567,7710.12375
2,0.5261217,-1.4453429,0.8773562,1182.5238216,1043.235,3378.71318
3,0.6266595,-1.6253748,0.8245892,441.9163117,319.7746,1799.72854
4,0.7834434,-1.4035801,0.8669203,203.448717,111.9642,1075.17572
5,0.8658729,-1.4441464,0.945279,108.6078687,43.5511,797.00441
6,0.8898753,-1.4768894,0.9716601,64.5114976,18.28113,637.20859
7,0.8912745,-1.4926775,0.9741285,41.4320173,8.1546,522.20536
8,0.8831012,-1.5019997,0.9666074,28.2032897,3.811398,435.63131
9,0.8868863,-1.4919976,0.9703931,20.0667718,1.876862,368.44942
10,0.8705646,-1.4969517,0.9589633,14.7773052,0.9534014,315.13822


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.004660991,0.2331819,0.9000312,4827.0899261,4838.323,7393.34864
2,0.322814314,-1.0173684,0.9141093,1427.0093516,1339.401,3198.09867
3,0.758168875,-1.4145888,0.9770816,555.4594181,458.3097,1865.90468
4,0.887359078,-1.6417496,0.9775873,261.2781262,179.1785,1346.14968
5,0.927048267,-1.6777043,0.9708589,141.1736451,77.36501,1045.95461
6,0.934607879,-1.6554652,0.9646767,84.5921187,35.81924,849.38314
7,0.922250915,-1.6226011,0.9485534,54.7897353,17.57525,705.52881
8,0.922720718,-1.5751487,0.9501887,37.64422,9.043572,595.67561
9,0.921775917,-1.5333439,0.9512377,27.0624045,4.839372,509.3082
10,0.92119626,-1.4956875,0.9535974,20.1538545,2.670832,439.92526


   Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k. max.k.
1      1  0.00466  0.233          0.900 4830.000  4.84e+03 7390.0
2      2  0.32300 -1.020          0.914 1430.000  1.34e+03 3200.0
3      3  0.75800 -1.410          0.977  555.000  4.58e+02 1870.0
4      4  0.88700 -1.640          0.978  261.000  1.79e+02 1350.0
5      5  0.92700 -1.680          0.971  141.000  7.74e+01 1050.0
6      6  0.93500 -1.660          0.965   84.600  3.58e+01  849.0
7      7  0.92200 -1.620          0.949   54.800  1.76e+01  706.0
8      8  0.92300 -1.580          0.950   37.600  9.04e+00  596.0
9      9  0.92200 -1.530          0.951   27.100  4.84e+00  509.0
10    10  0.92100 -1.500          0.954   20.200  2.67e+00  440.0
11    11  0.91500 -1.470          0.953   15.400  1.52e+00  383.0
12    12  0.91600 -1.440          0.956   12.100  8.85e-01  336.0
13    13  0.88800 -1.450          0.937    9.650  5.29e-01  297.0
14    14  0.88500 -1.440          0.938    7.820  3.22e-01  264.0
15    15  

“'dlpfc/CTL_SZD' already exists”


 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
 Flagging genes and samples with too many missing values...
  ..step 1
pickSoftThreshold: will use block size 1680.
 pickSoftThreshold: calculating connectivity for given powers... 
   Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k. max.k.
1      1   0.0305 -0.302          0.901 3910.000  3.76e+03 7580.0
2      2   0.5860 -1.270          0.832 1050.000  8.35e+02 3350.0
3      3   0.6530 -1.420          0.760  402.000  2.35e+02 1820.0
4      4   0.8620 -1.200          0.887  194.000  7.67e+01 1130.0
5      5   0.8640 -1.310          0.928  108.000  2.79e+01  890.0
6      6   0.8830 -1.330          0.958   67.200  1.11e+01  721.0
7      7   0.8800 -1.350          0.965   44.800  4.72e+00  597.0
8      8   0.8880 -1.350          0.973   31.400  2.13e+00  502.0
9      9   

Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.03047936,-0.3016302,0.9014501,3906.0684497,3760.008,7581.18358
2,0.58590224,-1.2699595,0.8324474,1050.018893,834.9647,3354.22326
3,0.65271084,-1.4220954,0.7595869,402.1162883,234.6538,1820.50292
4,0.86184254,-1.1989487,0.8868867,193.5789941,76.65791,1132.11518
5,0.86368145,-1.3082959,0.9281571,108.3874791,27.89131,889.51475
6,0.88295808,-1.3340869,0.9577294,67.2188709,11.11324,720.85044
7,0.88006477,-1.352031,0.9651765,44.7767403,4.715439,596.69217
8,0.88778697,-1.3513821,0.9727314,31.4150278,2.131939,501.88529
9,0.89189733,-1.3576999,0.9778847,22.9161592,0.9950253,427.55846
10,0.88054165,-1.3718907,0.9714909,17.2289501,0.4889138,368.09065


Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.04675225,-0.4476371,0.9205575,4370.4399339,4315.119,7957.79238
2,0.5324931,-1.2535674,0.8799631,1240.0759427,1081.469,3522.83477
3,0.64699252,-1.4010752,0.819474,480.6678149,338.0529,1892.41218
4,0.83536461,-1.2675615,0.9063818,228.975927,122.2293,1170.72093
5,0.85928134,-1.413992,0.9482732,125.7492711,48.60109,911.52696
6,0.88322972,-1.4484175,0.9700858,76.3657669,20.86091,731.66201
7,0.89390735,-1.4526974,0.9794449,49.8819437,9.562496,600.1931
8,0.88895651,-1.4582173,0.9749824,34.3977044,4.577396,500.62414
9,0.88884809,-1.4538698,0.9727246,24.7221949,2.281323,423.18229
10,0.89436911,-1.4444438,0.9750865,18.3529948,1.176233,361.67319


   Power SFT.R.sq  slope truncated.R.sq  mean.k. median.k. max.k.
1      1   0.0468 -0.448          0.921 4370.000  4.32e+03 7960.0
2      2   0.5320 -1.250          0.880 1240.000  1.08e+03 3520.0
3      3   0.6470 -1.400          0.819  481.000  3.38e+02 1890.0
4      4   0.8350 -1.270          0.906  229.000  1.22e+02 1170.0
5      5   0.8590 -1.410          0.948  126.000  4.86e+01  912.0
6      6   0.8830 -1.450          0.970   76.400  2.09e+01  732.0
7      7   0.8940 -1.450          0.979   49.900  9.56e+00  600.0
8      8   0.8890 -1.460          0.975   34.400  4.58e+00  501.0
9      9   0.8890 -1.450          0.973   24.700  2.28e+00  423.0
10    10   0.8940 -1.440          0.975   18.400  1.18e+00  362.0
11    11   0.8920 -1.440          0.974   14.000  6.31e-01  312.0
12    12   0.8920 -1.440          0.973   10.900  3.46e-01  271.0
13    13   0.8830 -1.450          0.965    8.620  1.94e-01  237.0
14    14   0.8820 -1.440          0.965    6.930  1.12e-01  209.0
15    15  

In [11]:
scale_free_df2

Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.,brain_region,sex,status
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
15,0.8862898,-1.398608,0.9767316,5.513155,0.02037328,194.7508,dlpfc,male,CTL_SZD
15,0.8889554,-1.437447,0.9697805,5.639542,0.0659227,185.1673,dlpfc,female,CTL_SZD
15,0.8676044,-1.488453,0.9595083,4.426547,0.04846326,162.3829,dlpfc,male,SZD
15,0.8794784,-1.428097,0.9370866,6.418598,0.1999023,235.2397,dlpfc,female,SZD
15,0.9020769,-1.344196,0.9887057,7.865503,0.04420381,233.7331,dlpfc,male,CTL
15,0.8616363,-1.504724,0.9823709,8.42959,0.22049679,198.1165,dlpfc,female,CTL


# Main Results

In [12]:
# with these 3 DLPFC samples removed (R3555, R12351, R12371)  we were able to get a scale-free network also on Power 15 (R2 >= 0.85) on all DLPFC data as well

metadata %>% 
        filter(RNum == 'R3555' | RNum == 'R12351' | RNum == 'R12371') %>%
        select(RNum, Sex, Dx, RIN, Region)

Unnamed: 0_level_0,RNum,Sex,Dx,RIN,Region
Unnamed: 0_level_1,<chr>,<fct>,<fct>,<dbl>,<chr>
R12351,R12351,F,Control,6.5,DLPFC
R12371,R12371,F,SCZD,6.2,DLPFC
R3555,R3555,M,SCZD,9.0,DLPFC


In [13]:
options(repr.matrix.max.cols=100, repr.matrix.max.rows=100)

metadata %>% 
        filter(RNum == 'R3555' | RNum == 'R12351' | RNum == 'R12371') 

Unnamed: 0_level_0,group,lib.size,norm.factors,SAMPLE_ID,RNum,Region,Dataset,Protocol,RIN,BrNum,Dx,Race,Sex,Age,PMI,MoD,Mapping_Rate,Base_Mismatch,ExprProfEff,Exonic_Rate,Intronic_Rate,Intergenic_Rate,totalAssignedGene,Ambiguous_Alignment_Rate,rRNA_rate,End_1_Sense_Rate,End_2_Sense_Rate,Chimeric_Alignment_Rate,Low_Mapping_Quality,Low_Quality_Reads,Genes_Detected,Mean3Bias,numReads,totalMapped,Mapped_Unique_Reads,Intergenic_Reads,gene_Assigned,Ambiguous_Reads,Intronic_Reads,Read_Length,NonGlobin_Reads,MedianAvgTxCov,Mito_mapped,mitoRate,globinRate,IID,SOL,snpPC1,snpPC2,snpPC3,snpPC4,snpPC5,snpPC6,snpPC7,snpPC8,snpPC9,snpPC10,New_Dx,antipsychotics,lifetime_antipsych
Unnamed: 0_level_1,<fct>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<chr>,<fct>,<chr>,<fct>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<fct>,<int>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<lgl>,<lgl>
R12351,1,54780010,0.9441608,R12351_C5AEJACXX,R12351,DLPFC,BrainSeq_Phase2_DLPFC,RiboZeroGold,6.5,Br1289,Control,AA,F,17.27,15.5,Homicide,0.980549,0.00225312,0.348979,0.355901,0.558159,0.0461803,0.914061,0.0397591,0.00853538,0.0640477,0.933025,0.00545214,19508567,22497212,29553,0.530782,247598326,158487807,158487807,7319007,144867462,6301338,88461448,100,158485859,3.39481,2445098,0.009875261,7.867581e-06,4040296051_A,0,0.0775027,-0.00377709,-0.00016476,0.00130565,0.00150321,-0.000443292,0.000676359,0.000179677,-0.000669624,-0.000142894,Control,False,False
R12371,1,46147434,1.0431421,R12371_C5ADGACXX,R12371,DLPFC,BrainSeq_Phase2_DLPFC,RiboZeroGold,6.2,Br1420,SCZD,AA,F,80.2,17.5,Natural,0.979922,0.00207988,0.325075,0.331735,0.563556,0.0637595,0.895292,0.0409487,0.00607702,0.0482978,0.950043,0.00584187,17133187,18712553,29836,0.540818,215833337,126977892,126977892,8096053,113682258,5199581,71559197,100,126973167,3.37159,1501382,0.00695621,2.189189e-05,4572348703_R01C01,0,0.0673397,-0.00459136,0.00130164,-0.00139981,0.000280948,-0.000426639,-0.00139062,-0.00232595,-0.00264288,-0.000611504,Schizo_noAP,False,True
R3555,1,46409298,1.7344757,R3555_C4KHUACXX,R3555,DLPFC,BrainSeq_Phase2_DLPFC,RiboZeroGold,9.0,Br1427,SCZD,CAUC,M,66.81,24.5,Natural,0.951226,0.00293084,0.421507,0.44312,0.472219,0.050219,0.915339,0.0344419,0.00901134,0.0526278,0.945205,0.00800872,12140717,13390776,29384,0.599414,139795817,99833469,99833469,5013540,91381474,3438455,47143277,100,99831003,2.98411,1825485,0.013058223,1.764001e-05,4584656109_R01C02,0,-0.0222989,-0.00267137,7.06334e-05,0.00703864,-0.0115382,0.00770143,-0.00229756,0.0020686,0.00121966,-0.00103986,Schizo_AP,True,True


### Scale free topology index final results from Power 15

In [14]:
#Caudate & Hippocampus
scale_free_df %>% filter(Power == 15) %>% filter(brain_region != 'dlpfc') 

Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.,brain_region,sex,status
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
15,0.8603353,-1.649639,0.9721536,1.2683011,0.0356344068,43.80547,hippocampus,male,CTL_SZD
15,0.912947,-1.55558,0.9899227,2.8848024,0.0712492655,114.24422,hippocampus,female,CTL_SZD
15,0.8998215,-1.720211,0.9895271,1.6997666,0.0819433563,72.02038,hippocampus,male,SZD
15,0.8667655,-1.800413,0.9775726,3.3379508,0.257240598,133.03247,hippocampus,female,SZD
15,0.8503683,-1.75064,0.9813398,1.5508506,0.055742711,51.48229,hippocampus,male,CTL
15,0.8814454,-1.569738,0.9728334,4.7189026,0.1549152156,151.23476,hippocampus,female,CTL
15,0.9073596,-1.620669,0.9797628,0.1047947,0.0003920376,12.71824,caudate,male,CTL_SZD
15,0.9603098,-1.573757,0.985921,0.123504,0.0022253068,17.47407,caudate,female,CTL_SZD
15,0.9423368,-1.624786,0.9928773,0.1490048,0.0029376827,12.28523,caudate,male,SZD
15,0.9617629,-1.7982,0.9906883,0.3033478,0.0333957144,24.82404,caudate,female,SZD


In [15]:
#DLPFC
scale_free_df2

Power,SFT.R.sq,slope,truncated.R.sq,mean.k.,median.k.,max.k.,brain_region,sex,status
<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>
15,0.8862898,-1.398608,0.9767316,5.513155,0.02037328,194.7508,dlpfc,male,CTL_SZD
15,0.8889554,-1.437447,0.9697805,5.639542,0.0659227,185.1673,dlpfc,female,CTL_SZD
15,0.8676044,-1.488453,0.9595083,4.426547,0.04846326,162.3829,dlpfc,male,SZD
15,0.8794784,-1.428097,0.9370866,6.418598,0.1999023,235.2397,dlpfc,female,SZD
15,0.9020769,-1.344196,0.9887057,7.865503,0.04420381,233.7331,dlpfc,male,CTL
15,0.8616363,-1.504724,0.9823709,8.42959,0.22049679,198.1165,dlpfc,female,CTL


In [16]:
sessionInfo()

R version 4.2.2 (2022-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Arch Linux

Matrix products: default
BLAS:   /usr/lib/libopenblas.so.0.3
LAPACK: /usr/lib/liblapack.so.3.11.0

locale:
 [1] LC_CTYPE=C.UTF-8    LC_NUMERIC=C        LC_TIME=C          
 [4] LC_COLLATE=C        LC_MONETARY=C       LC_MESSAGES=C      
 [7] LC_PAPER=C          LC_NAME=C           LC_ADDRESS=C       
[10] LC_TELEPHONE=C      LC_MEASUREMENT=C    LC_IDENTIFICATION=C

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] limma_3.54.2          IRdisplay_1.1         gridExtra_2.3        
 [4] latticeExtra_0.6-30   lattice_0.20-45       flashClust_1.01-2    
 [7] WGCNA_1.72-1          fastcluster_1.2.3     dynamicTreeCut_1.63-1
[10] data.table_1.14.8     lubridate_1.9.2       forcats_1.0.0        
[13] stringr_1.5.0         dplyr_1.1.0           purrr_1.0.1          
[16] readr_2.1.4           tidyr_1.3.0      