<a href="https://colab.research.google.com/github/Intertangler/ML4biotech/blob/main/kerasdeep_neural_networks_exercise_R.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## import the data
This artificial data represents the gene expression levels (normalized already) of two separate genes. The class labels associated with each data point indicate the presence or absence 1 or 0 of a particular downstream phenotype influenced by the genes. Our goal here is to detect a nonlinear relationship between the two gene expression levels that strongly correlates with the downstream phenotype.

In [1]:
%load_ext rpy2.ipython

In [None]:
%%R
install.packages("keras")


In [None]:
%%R
install.packages("caTools")
install.packages("pROC")

In [None]:
%%R
library(readr)
library(dplyr)
# Load required packages
library(caTools)
library(pROC)
# Plotting
# This part would go into a separate block if you are running in R Studio or similar
library(ggplot2)

In [None]:
%%R

url <- "https://raw.githubusercontent.com/Intertangler/ML4biotech/main/gene_expression_XOR.csv"
df <- read_csv(url)

X <- df %>% select(-last_col())
y <- df %>% select(last_col())

# Convert data frames to matrices or vectors as needed
X <- as.matrix(X)
y <- as.vector(y[[1]])

# shuffle data and labels
shuffle_idx <- sample(seq_len(nrow(X)), replace = FALSE)
X <- X[shuffle_idx, , drop = FALSE]
y <- y[shuffle_idx]

set.seed(428) # Seed random number gen

# Split data to training/testing subsets
split <- sample.split(y, SplitRatio = 0.8)
X_train <- X[split, ]
y_train <- y[split]
X_test <- X[!split, ]
y_test <- y[!split]

In [None]:
%%R
# Prepare the data frame for ggplot
data_plot <- data.frame(X1 = X_train[, 1], X2 = X_train[, 2], Class = as.factor(y_train))

# Plot data points using ggplot
ggplot(data = data_plot, aes(x = X1, y = X2, color = Class)) +
  geom_point() +
  xlab("Gene Expression 1") +
  ylab("Gene Expression 2") +
  scale_color_manual(name = "Pehnotype", values = c("0" = "red", "1" = "blue")) +
  theme_minimal() +
  theme(legend.position = "bottom")


## exercise
Complete the missing lines.

In [None]:
%%R
library(keras)
library(caTools)
library(tensorflow)


#🌟🌟🌟🌟 YOUR CODE HERE 🌟🌟🌟🌟#  # Initialize the model
#🌟🌟🌟🌟 YOUR CODE HERE 🌟🌟🌟🌟# # Add one or more hidden layers with a certain number of nodes
#🌟🌟🌟🌟 YOUR CODE HERE 🌟🌟🌟🌟# # Add the output layer with a sigmoid activation
#🌟🌟🌟🌟 YOUR CODE HERE 🌟🌟🌟🌟# # Compile the model with binary cross-entropy loss and adaptive moment estimation



n_samples <- dim(X_train)[1]

# Train
history <- model %>% fit(
  X_train, y_train,
  epochs = 300,
  batch_size = n_samples,
  validation_data = list(X_test, y_test)
)




In [None]:
%%R
# Train a logistic regression model using glm
logreg_model <- glm(y_train ~ ., data = data.frame(y_train, X_train), family = binomial)

# Make probability predictions
logreg_prob <- predict(logreg_model, newdata = data.frame(X_test), type = "response")

# Generate ROC data for logistic regression
roc_data <- roc(y_test, logreg_prob)
logreg_fpr <- roc_data$sensitivities
logreg_tpr <- roc_data$specificities
logreg_auc <- auc(roc_data)



# Evaluate the model
model %>% evaluate(X_test, y_test)

# Make predictions
predictions <- model %>% predict(X_test)

# Use 'predictions' instead of 'mlp_prob_custom' for the ROC curve
roc_data_custom <- roc(y_test, predictions)
custom_mlp_fpr <- roc_data_custom$sensitivities
custom_mlp_tpr <- roc_data_custom$specificities
custom_mlp_auc <- auc(roc_data_custom)

# Create a data frame for ggplot
roc_df <- data.frame(
  FPR = c(logreg_fpr, custom_mlp_fpr),
  TPR = c(logreg_tpr, custom_mlp_tpr),
  Model = factor(rep(c("Logistic Regression", "Custom MLP"), each = length(logreg_fpr)))
)

# Create the ROC plot using ggplot
ggplot(roc_df, aes(x = FPR, y = TPR, color = Model)) +
  geom_line() +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
  xlab("False Positive Rate") +
  ylab("True Positive Rate") +
  ggtitle(sprintf("ROC Curve\nLogistic AUC = %0.2f, MLP AUC = %0.2f", logreg_auc, custom_mlp_auc))