In [17]:
# Load and inspect the features table / Change the name in the code below if needed.
# (Optional) change the features_quantification_matrix.csv to the name of your .csv file.
# Make sure to add the value of bioactivity in the second columns
tab <- read.csv("features_quantification_matrix_edited_bioactivity.csv")
nm <- as.vector(as.matrix(read.csv("features_quantification_matrix_edited_bioactivity.csv", nrow=1, header=FALSE)))
                                                                                                 
colnames(tab) <- nm 
dim(tab)                  
tab[1:5,1:5]                                                       


Sample name,BioactivityCHIKV,270.279 1698 1 (ID: 9),271.283 1699 1 (ID: 355),279.174 1413 1 (ID: 511)
Extract,68,5690331,569378.1,3172924
F_5,1,246596026,40396489.5,0
F_6,4,186949612,30710029.2,0
F_7,1,362958584,63895441.0,0
F_8,3,160968307,26317669.9,0


In [18]:
# Take out blank rows in the table
tab <- tab[!is.na(tab[,2]),]

In [19]:
# Normalize the features by TIC and adding 1 to all feature intensities 
# to help scaling
tab2 <- tab
tab2[,-c(1:2)] <- t(apply(tab2[,-c(1:2)], 1, function(x) (x+1)/sum((x+1))))
#tab2[,-c(1:2)] <- t(apply(tab2[,-c(1:2)], 1, function(x) (x+1)))

In [20]:
# Calculate the correlation coefficient between a single feature and the bioactivity.
# Scale should help correlation - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1534033/
cor.test(scale(tab2[,2])[,1], scale(tab2[,3])[,1])[c("estimate", "p.value")]

In [21]:
# How to do for all features
ct <- t(sapply(3:ncol(tab2), function(x) unlist(cor.test(scale(tab2[,2])[,1], scale(tab2[,x])[,1])[c("estimate", "p.value")])))

In [22]:
# Show the dimensions of the features_quantificaton_matrix.csv
dim(tab2)
dim(ct)


In [23]:
# Create an output table with correlation coefficient value and p-value for every features

ct <- rbind(c("cor"," p_value"), c(0,0), ct)

tab3 <- rbind(t(ct),  as.matrix(tab2))
rownames(tab3) <- NULL
tab3[1:5, 1:5]
write.csv(tab3, "features_quantification_matrix_edited_with_correlation.csv", row.names=FALSE)

Sample name,BioactivityCHIKV,270.279 1698 1 (ID: 9),271.283 1699 1 (ID: 355),279.174 1413 1 (ID: 511)
cor,0.0,-0.324377606725705,-0.318374649942669,0.11344978411662
p_value,0.0,0.257849356719369,0.267271199671765,0.699375059832539
Extract,68.0,0.0001391824,1.39267e-05,7.760803e-05
F_5,1.0,0.159407965,0.02611365,6.464336e-10
F_6,4.0,0.0228397314,0.0037518603,1.221705e-10


In [24]:
# Tranpose the table for molecular networking mapping in Cytoscape
new = t(tab3)
colnames(new) = new[1,]
new = new[-1,]
new = cbind(0:(nrow(new)-1), rownames(new), new)
rownames(new) <- NULL
colnames(new)[1:2] <- c("shared name", "IDs")
new[1,1] <- ""
new[1:5,1:5]
write.csv(new, "features_quantification_matrix_transposed_with_correlation.csv", row.names=FALSE)

shared name,IDs,cor,p_value,Extract
,BioactivityCHIKV,0.0,0.0,68.0
1.0,270.279 1698 1 (ID: 9),-0.324377606725705,0.257849356719369,0.0001391824
2.0,271.283 1699 1 (ID: 355),-0.318374649942669,0.267271199671765,1.39267e-05
3.0,279.174 1413 1 (ID: 511),0.11344978411662,0.699375059832539,7.760803e-05
4.0,280.264 1420 1 (ID: 1029),-0.200702959079237,0.49145327312603,4.45623e-05


In [25]:
# Get the significant correlation coefficients for both cases (>0.05)
which(as.numeric(ct[-c(1,2),2])<0.05)

In [26]:
# Show the features ID with correlation coefficient
nm[-c(1:2)][as.numeric(ct[-c(1,2),2])<0.05]

In [27]:
which(as.numeric(ct[-c(1,2),2])<0.05)

In [28]:
# Table with significant correlation coefficients. Using Bonferroni method.
ct[-c(1,2),][which(p.adjust(as.numeric(ct[-c(1:2),2]), method = "bonferroni")<0.05),]

estimate.cor,p.value
0.911533897243834,5.70642706988177e-06
0.878799706808992,3.50720064572382e-05
0.907554668668843,7.36501900414158e-06
0.905633511042207,8.29688606983179e-06
0.919233388225548,3.36127974167346e-06
0.873397143507255,4.50107734106115e-05
0.911780588059994,5.61468814684507e-06
0.910238252830603,6.20871030421146e-06
0.913168025609065,5.12101071510364e-06
0.924118681813123,2.33675278160981e-06


In [29]:
# Call the ID
which(p.adjust(as.numeric(ct[-c(1:2),2]), method = "bonferroni")<0.05)

In [30]:
# Features passing Bonferronii method
nm[-c(1:2)][which(p.adjust(as.numeric(ct[-c(1:2),2]), method = "bonferroni")<0.05)]


In [31]:
# Prepare the new table
new <- cbind(new[,1:5], c(0, p.adjust(as.numeric(ct[-c(1:2),2]), method = "bonferroni")), new[,-c(1:5)])
colnames(new)[6] <- "p_value_corrected"
new[,1:10]

shared name,IDs,cor,p_value,Extract,p_value_corrected,F_5,F_6,F_7,F_8
,BioactivityCHIKV,0,0,68.0,0,1.0,4.0,1.0,3.0
1,270.279 1698 1 (ID: 9),-0.324377606725705,0.257849356719369,0.0001391824,1,0.1594079650,0.0228397314,0.3139762067,0.0551088883
2,271.283 1699 1 (ID: 355),-0.318374649942669,0.267271199671765,0.0000139267,1,0.0261136500,0.0037518603,0.0552725554,0.0090100816
3,279.174 1413 1 (ID: 511),0.11344978411662,0.699375059832539,7.760803e-05,1,6.464336e-10,1.221705e-10,8.650469e-10,3.423586e-10
4,280.264 1420 1 (ID: 1029),-0.200702959079237,0.49145327312603,4.456230e-05,1,6.101911e-02,8.258893e-05,8.404894e-05,5.121893e-05
5,281.190 1031 1 (ID: 1070),-0.201330433729144,0.490071225054009,4.369353e-05,1,6.464336e-10,1.194904e-04,2.992650e-02,8.518229e-05
6,282.279 1660 1 (ID: 345),-0.201928970163321,0.48875460960341,4.563084e-04,1,5.618968e-01,1.014960e-03,2.860098e-03,9.171837e-04
7,297.185 1652 1 (ID: 1035),-0.27121389090935,0.348283666308653,6.741464e-05,1,6.464336e-10,1.150745e-02,8.201229e-03,7.540400e-05
8,297.185 1556 1 (ID: 357),-0.214874888224598,0.460688215436271,3.286022e-04,1,6.464336e-10,4.233761e-03,5.458206e-02,4.329827e-04
9,298.311 1968 1 (ID: 132),-0.296642926666024,0.3030698225754,2.040302e-05,1,2.699247e-02,6.353464e-03,7.801129e-02,7.468284e-03


In [32]:
# Write the final table with corrected p_value
write.csv(new, "features_quantification_matrix_transposed_with_significant_correlation_pvalue_corrected.csv", row.names=FALSE)