<a href="https://colab.research.google.com/github/Saherpathan/biomarker-detection/blob/main/Potential_biomarkers.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Loading libraries
library(readr)


data <- read.csv("/content/Diff_genes_heatmap_NSIP-CHP.csv")
log2_fold_change_col <- "log2 Fold Change"
adj_pval_col <- "Adj.Pval"

# Defining the p-value cutoff
p_cutoff <- 0.05

filtered_data <- data[data[, adj_pval_col] < p_cutoff, ]



In [None]:
print(filtered_data)

    Regulation      Ensembl.ID log2.Fold.Change Adj.Pval
1           Up ENSG00000229807         8.847953 1.30e-10
2           Up ENSG00000169894         6.917722 3.08e-02
3           Up ENSG00000270641         6.784331 1.63e-03
5           Up ENSG00000211660         6.093486 2.69e-02
6           Up ENSG00000282282         5.894834 4.76e-02
8           Up ENSG00000144481         5.856526 3.33e-02
9           Up ENSG00000169876         5.780382 2.08e-02
11          Up      AL157371.2         5.705047 7.79e-03
13          Up ENSG00000167612         5.680745 3.08e-02
14          Up ENSG00000196946         5.626413 3.00e-02
15          Up ENSG00000224187         5.611695 7.71e-03
16          Up      AC123912.2         5.594945 1.70e-03
18          Up ENSG00000105509         5.398978 9.70e-03
19          Up ENSG00000168903         5.348163 4.08e-02
23          Up ENSG00000253314         5.322495 4.84e-02
24          Up      AC010980.1         5.309947 1.02e-02
26          Up ENSG00000252712 

In [None]:
#Saving filtered data
write.csv(filtered_data, "/content/filtered_data.csv", row.names = FALSE)

***Data*** ***Analysis***

In [None]:
most_significant_gene_row <- filtered_data[1, ]


In [None]:
print(nrow(filtered_data))

[1] 169


In [None]:
print(summary(filtered_data))

  Regulation         Ensembl.ID        log2.Fold.Change    Adj.Pval      
 Length:169         Length:169         Min.   :-3.017   Min.   :0.00000  
 Class :character   Class :character   1st Qu.: 2.535   1st Qu.:0.01200  
 Mode  :character   Mode  :character   Median : 3.259   Median :0.02300  
                                       Mean   : 3.326   Mean   :0.02377  
                                       3rd Qu.: 4.124   3rd Qu.:0.03480  
                                       Max.   : 8.848   Max.   :0.04930  
    Symbol              Chr                Type              NSIP_73      
 Length:169         Length:169         Length:169         Min.   : 2.000  
 Class :character   Class :character   Class :character   1st Qu.: 3.932  
 Mode  :character   Mode  :character   Mode  :character   Median : 5.239  
                                                          Mean   : 5.951  
                                                          3rd Qu.: 7.777  
                                

In [None]:
log2_fold_change <- most_significant_gene_row$log2.Fold.Change
adjusted_pval <- most_significant_gene_row$Adj.Pval
gene_symbol <- most_significant_gene_row$Symbol

# Checking for missing values and handling accordingly
if (is.na(log2_fold_change) | is.na(adjusted_pval)) {
  cat("Warning: Missing values encountered in the most significant gene.\n")
  cat("Potential Biomarker:", gene_symbol, "\n")
  cat("Log2 Fold Change:", log2_fold_change, "\n")  # Print even if NA
  cat("Adjusted P-value:", adjusted_pval, "\n")  # Print even if NA
} else {
  cat("Potential Biomarker:", gene_symbol, "\n")
  cat("Log2 Fold Change:", log2_fold_change, "\n")
  cat("Adjusted P-value:", adjusted_pval, "\n")
}

Potential Biomarker: XIST 
Log2 Fold Change: 8.847953 
Adjusted P-value: 1.3e-10 


***Validation***

In [None]:
install.packages("pROC")

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘plyr’, ‘Rcpp’




In [None]:
library(pROC)

Type 'citation("pROC")' for a citation.


Attaching package: ‘pROC’


The following objects are masked from ‘package:stats’:

    cov, smooth, var




In [None]:
library(pROC)

auc <- roc(filtered_data$Regulation, filtered_data$log2.Fold.Change)$auc

# Print AUC
print(paste("AUC:", auc))


Setting levels: control = Down, case = Up

Setting direction: controls < cases



[1] "AUC: 1"
