# Univariate, Bivariate, and Multivariate Analysis (R)

## Overview
This notebook provides comprehensive analysis of individual variables, relationships between variables, and patterns across multiple variables.


In [None]:
# Load required libraries
library(data.table)
library(dplyr)
library(ggplot2)
library(corrplot)

# Load and prepare data
# [Include data loading code from EDA notebook]


## 1. Univariate Analysis


In [None]:
# Univariate analysis for numerical variables
if ("Source.Port" %in% colnames(df)) {
  # Histogram
  hist(df$Source.Port, main = "Source Port Distribution", xlab = "Source Port")
  
  # Box plot
  boxplot(df$Source.Port, main = "Source Port Box Plot")
  
  # Summary statistics
  summary(df$Source.Port)
}


## 2. Bivariate Analysis


In [None]:
# Scatter plot
if ("Source.Port" %in% colnames(df) && "Destination.Port" %in% colnames(df)) {
  plot(df$Source.Port, df$Destination.Port, 
       main = "Source Port vs Destination Port",
       xlab = "Source Port",
       ylab = "Destination Port")
  
  # Correlation
  cor_result <- cor(df$Source.Port, df$Destination.Port, use = "complete.obs")
  cat("Correlation:", cor_result, "\n")
}

# Box plot by category
if ("Attack.category" %in% colnames(df) && "Destination.Port" %in% colnames(df)) {
  boxplot(Destination.Port ~ Attack.category, data = df,
          main = "Destination Port by Attack Category",
          xlab = "Attack Category",
          ylab = "Destination Port")
}


## 3. Multivariate Analysis


In [None]:
# Correlation matrix
numerical_cols <- c("Source.Port", "Destination.Port", "Time_Duration", "Hour")
numerical_cols <- numerical_cols[numerical_cols %in% colnames(df)]

if (length(numerical_cols) > 1) {
  cor_matrix <- cor(df[, numerical_cols], use = "complete.obs")
  corrplot(cor_matrix, method = "color", type = "upper", order = "hclust",
           tl.cex = 0.8, tl.col = "black")
}
