<a href="https://colab.research.google.com/github/SumiranRai/MDSC-Lab/blob/main/MDSC-201-Statistical-Methods-Using-R/R_Assignment_6.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Load required libraries
install.packages(c("caret", "pROC", "ggplot2"))
library(caret)
library(pROC)
library(ggplot2)

# Define the raw GitHub URL
url <- "https://raw.githubusercontent.com/SumiranRai/MDSC-Lab/main/MDSC-201-Statistical-Methods-Using-R/wdbc.data"

# Read data from GitHub
data <- read.csv(url, header=FALSE)

# Define column names
col_names <- c("ID", "Diagnosis", "radius1", "texture1", "perimeter1", "area1",
               "smoothness1", "compactness1", "concavity1", "concave_points1",
               "symmetry1", "fractal_dimension1", "radius2", "texture2", "perimeter2",
               "area2", "smoothness2", "compactness2", "concavity2", "concave_points2",
               "symmetry2", "fractal_dimension2", "radius3", "texture3", "perimeter3",
               "area3", "smoothness3", "compactness3", "concavity3", "concave_points3",
               "symmetry3", "fractal_dimension3")

colnames(data) <- col_names

# Convert Diagnosis to binary (M = 1, B = 0)
data$Diagnosis <- ifelse(data$Diagnosis == "M", 1, 0)
data$Diagnosis <- as.factor(data$Diagnosis)

# Remove ID column
data <- data[,-1]

# Save transformed data to CSV
write.csv(data, "transformed_wdbc.csv", row.names=FALSE)

# Split dataset into train (70%) and test (30%)
set.seed(123)
trainIndex <- createDataPartition(data$Diagnosis, p=0.7, list=FALSE)
train_data <- data[trainIndex, ]
test_data <- data[-trainIndex, ]

# Train logistic regression model
model <- glm(Diagnosis ~ ., data=train_data, family=binomial)

# Make predictions
pred_probs <- predict(model, test_data, type="response")
pred_labels <- ifelse(pred_probs > 0.5, 1, 0)
pred_labels <- as.factor(pred_labels)

# Convert test labels to factor for confusion matrix
test_data$Diagnosis <- as.factor(test_data$Diagnosis)

# Compute confusion matrix
conf_matrix <- confusionMatrix(pred_labels, test_data$Diagnosis)
print(conf_matrix)

# Calculate AUC
roc_curve <- roc(test_data$Diagnosis, pred_probs)
auc_value <- auc(roc_curve)
print(paste("AUC:", round(auc_value, 4)))

# Plot ROC curve
ggplot() +
  geom_line(aes(x=roc_curve$specificities, y=roc_curve$sensitivities), color="blue") +
  geom_abline(linetype="dashed", color="red") +
  ggtitle(paste("ROC Curve (AUC =", round(auc_value, 4), ")")) +
  xlab("1 - Specificity") +
  ylab("Sensitivity") +
  theme_minimal()


Installing packages into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)

also installing the dependencies ‘listenv’, ‘parallelly’, ‘future’, ‘future.apply’, ‘diagram’, ‘lava’, ‘prodlim’, ‘clock’, ‘gower’, ‘hardhat’, ‘ipred’, ‘timeDate’, ‘e1071’, ‘foreach’, ‘ModelMetrics’, ‘plyr’, ‘recipes’, ‘reshape2’


