In [3]:
# For Optimus table. Load and inspect the features table / Change the name in the code below if needed.
# (Optional) change the features_quantification_matrix.csv to the name of your .csv file.
# Make sure to add the value of bioactivity in the second columns
tab <- read.csv("TEMPLATE_FILES/INPUT_FILE/features_quantification_matrix_edited_bioactivity_Optimus.csv", stringsAsFactor=FALSE, check.names=FALSE)
dim(tab)                  
tab[1:5,1:5]                                                       


Sample name,BioactivityCHIKV,270.279 1698 1 (ID: 9),271.283 1699 1 (ID: 355),279.174 1413 1 (ID: 511)
Extract,68,5690331,569378.1,3172924
F_5,1,246596026,40396489.5,0
F_6,4,186949612,30710029.2,0
F_7,1,362958584,63895441.0,0
F_8,3,160968307,26317669.9,0


In [4]:
# Take out blank rows in the table
tab <- tab[!is.na(tab[,2]),]

In [5]:
# Add 1 to all to help scaling feature intensities and Normalize the features by TIC  
# 
tab2 <- tab
tab2[,-c(1:2)] <- t(apply(tab2[,-c(1:2)], 1, function(x) (x+1)))
tab2[,-c(1:2)] <- t(apply(tab2[,-c(1:2)], 1, function(x) (x+1)/sum((x+1))))

In [6]:
# Calculate the correlation coefficient between a single feature and the bioactivity.
# Scale should help correlation - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1534033/
cor.test(scale(tab2[,2])[,1], scale(tab2[,3])[,1])[c("estimate", "p.value")]

In [7]:
# How to do for all features
ct <- t(sapply(3:ncol(tab2), function(x) unlist(cor.test(scale(tab2[,2])[,1], scale(tab2[,x])[,1])[c("estimate", "p.value")])))

In [8]:
# Show the dimensions of the features_quantificaton_matrix.csv
dim(tab2)
dim(ct)


In [9]:
# Create an output table with correlation coefficient value and p-value for every features

ct <- rbind(c("cor"," p_value"), c(0,0), ct)

tab3 <- rbind(t(ct),  as.matrix(tab2))
rownames(tab3) <- NULL
tab3[1:5, 1:5]
write.csv(tab3, "features_quantification_matrix_edited_with_correlation.csv", row.names=FALSE)

Sample name,BioactivityCHIKV,270.279 1698 1 (ID: 9),271.283 1699 1 (ID: 355),279.174 1413 1 (ID: 511)
cor,0.0,-0.324377626535573,-0.318374670042615,0.113449595004765
p_value,0.0,0.257849325961648,0.267271167784958,0.699375539656519
Extract,68.0,0.0001391824,1.392673e-05,7.760805e-05
F_5,1.0,0.1594079051,0.02611364,1.292867e-09
F_6,4.0,0.0228397299,0.00375186,2.44341e-10


In [10]:
# Tranpose the table for molecular networking mapping in Cytoscape
new = t(tab3)
colnames(new) = new[1,]
new = new[-1,]
new = cbind(0:(nrow(new)-1), rownames(new), new)
rownames(new) <- NULL
colnames(new)[1:2] <- c("shared name", "IDs")
new[1,1] <- ""
new[1:5,1:5]
write.csv(new, "features_quantification_matrix_transposed_with_correlation.csv", row.names=FALSE)

shared name,IDs,cor,p_value,Extract
,BioactivityCHIKV,0.0,0.0,68.0
1.0,270.279 1698 1 (ID: 9),-0.324377626535573,0.257849325961648,0.0001391824
2.0,271.283 1699 1 (ID: 355),-0.318374670042615,0.267271167784958,1.392673e-05
3.0,279.174 1413 1 (ID: 511),0.113449595004765,0.699375539656519,7.760805e-05
4.0,280.264 1420 1 (ID: 1029),-0.200702963314996,0.491453263790403,4.456232e-05


In [11]:
# Get the significant correlation coefficients for both cases (>0.05)
which(as.numeric(ct[-c(1,2),2])<0.05)

In [13]:
# Show the features ID with correlation coefficient
nm <- colnames(tab)
nm[-c(1:2)][as.numeric(ct[-c(1,2),2])<0.05]

In [14]:
which(as.numeric(ct[-c(1,2),2])<0.05)

In [15]:
# Table with significant correlation coefficients. Using Bonferroni method.
ct[-c(1,2),][which(p.adjust(as.numeric(ct[-c(1:2),2]), method = "bonferroni")<0.05),]

estimate.cor,p.value
0.911533890013771,5.70642977651841e-06
0.878799703046329,3.50720126932845e-05
0.907554634997394,7.36503454823261e-06
0.905633362410499,8.2969617343469e-06
0.919233347913614,3.3612895078753e-06
0.87339713251949,4.50107957344833e-05
0.911780542044347,5.61470514533595e-06
0.910238246274744,6.20871293458314e-06
0.913168007523286,5.12101690943807e-06
0.924118662302627,2.33675628586297e-06


In [16]:
# Call the ID
which(p.adjust(as.numeric(ct[-c(1:2),2]), method = "bonferroni")<0.05)

In [17]:
# Features passing Bonferronii method
nm[-c(1:2)][which(p.adjust(as.numeric(ct[-c(1:2),2]), method = "bonferroni")<0.05)]


In [18]:
# Prepare the new table
new <- cbind(new[,1:5], c(0, p.adjust(as.numeric(ct[-c(1:2),2]), method = "bonferroni")), new[,-c(1:5)])
colnames(new)[6] <- "p_value_corrected"
new[,1:10]

shared name,IDs,cor,p_value,Extract,p_value_corrected,F_5,F_6,F_7,F_8
,BioactivityCHIKV,0,0,68.0,0,1.0,4.0,1.0,3.0
1,270.279 1698 1 (ID: 9),-0.324377626535573,0.257849325961648,0.0001391824,1,0.1594079051,0.0228397299,0.3139760481,0.0551088775
2,271.283 1699 1 (ID: 355),-0.318374670042615,0.267271167784958,1.392673e-05,1,2.611364e-02,3.751860e-03,5.527253e-02,9.010080e-03
3,279.174 1413 1 (ID: 511),0.113449595004765,0.699375539656519,7.760805e-05,1,1.292867e-09,2.443410e-10,1.730093e-09,6.847171e-10
4,280.264 1420 1 (ID: 1029),-0.200702963314996,0.491453263790403,4.456232e-05,1,6.101909e-02,8.258905e-05,8.404977e-05,5.121927e-05
5,281.190 1031 1 (ID: 1070),-0.201330441235917,0.490071208530791,4.369355e-05,1,1.292867e-09,1.194905e-04,2.992649e-02,8.518262e-05
6,282.279 1660 1 (ID: 345),-0.201928970569719,0.488754608710009,4.563084e-04,1,5.618966e-01,1.014960e-03,2.860098e-03,9.171838e-04
7,297.185 1652 1 (ID: 1035),-0.271213894966513,0.348283658813002,6.741466e-05,1,1.292867e-09,1.150745e-02,8.201226e-03,7.540433e-05
8,297.185 1556 1 (ID: 357),-0.214874897759919,0.460688195057373,3.286022e-04,1,1.292867e-09,4.233761e-03,5.458203e-02,4.329829e-04
9,298.311 1968 1 (ID: 132),-0.29664294602858,0.303069789505169,2.040304e-05,1,2.699246e-02,6.353463e-03,7.801125e-02,7.468283e-03


In [19]:
# Write the final table with corrected p_value
write.csv(new, "features_quantification_matrix_transposed_with_significant_correlation_pvalue_corrected.csv", row.names=FALSE)