# Machine Learning Analysis - Cybersecurity Attacks Dataset (R)

## Overview
This notebook implements machine learning algorithms to classify cybersecurity attacks.


In [None]:
# Load required libraries
library(data.table)
library(dplyr)
library(caret)
library(randomForest)
library(e1071)
library(rpart)
library(rpart.plot)
library(xgboost)

# Load and prepare data
# [Include data loading code from EDA notebook]


## 1. Feature Engineering


In [None]:
# Prepare features
features <- c("Source.Port", "Destination.Port", "Hour", "Month")
if ("Time_Duration" %in% colnames(df)) {
  features <- c(features, "Time_Duration")
}

# Encode categorical variables
if ("Protocol" %in% colnames(df)) {
  df$Protocol_encoded <- as.numeric(as.factor(df$Protocol))
  features <- c(features, "Protocol_encoded")
}

# Target variable
if ("Attack.category" %in% colnames(df)) {
  df$Attack_category_factor <- as.factor(df$Attack.category)
  target <- "Attack_category_factor"
}

# Remove rows with missing values
df_clean <- df[complete.cases(df[, features]), ]


## 2. Model Training


In [None]:
# Split data
set.seed(42)
train_index <- createDataPartition(df_clean[[target]], p = 0.8, list = FALSE)
train_data <- df_clean[train_index, ]
test_data <- df_clean[-train_index, ]

# Train Random Forest
formula <- as.formula(paste(target, "~", paste(features, collapse = "+")))
rf_model <- randomForest(formula, data = train_data, ntree = 100, importance = TRUE)
rf_pred <- predict(rf_model, test_data)
rf_accuracy <- mean(rf_pred == test_data[[target]])
cat("Random Forest Accuracy:", rf_accuracy, "\n")

# Train Decision Tree
dt_model <- rpart(formula, data = train_data, method = "class")
dt_pred <- predict(dt_model, test_data, type = "class")
dt_accuracy <- mean(dt_pred == test_data[[target]])
cat("Decision Tree Accuracy:", dt_accuracy, "\n")


## 3. Model Evaluation


In [None]:
# Confusion matrix
confusionMatrix(rf_pred, test_data[[target]])

# Feature importance
importance(rf_model)
varImpPlot(rf_model)
