# Statistical Analysis - Cybersecurity Attacks Dataset (R)

## Overview
This notebook provides comprehensive statistical analysis including descriptive statistics, inferential statistics, and hypothesis testing.


In [None]:
# Load required libraries
library(data.table)
library(dplyr)
library(ggplot2)
library(corrplot)
library(car)
library(psych)

# Load and prepare data (same as EDA notebook)
# [Include data loading code from EDA notebook]


## 1. Descriptive Statistics


In [None]:
# Descriptive statistics
summary(df)

# For numerical variables
if ("Source.Port" %in% colnames(df)) {
  describe(df$Source.Port)
  describe(df$Destination.Port)
}


## 2. Hypothesis Testing


In [None]:
# Chi-square test
if ("Attack.category" %in% colnames(df) && "Protocol" %in% colnames(df)) {
  contingency_table <- table(df$Attack.category, df$Protocol)
  chi_test <- chisq.test(contingency_table)
  print(chi_test)
}

# ANOVA test
if ("Attack.category" %in% colnames(df) && "Destination.Port" %in% colnames(df)) {
  aov_result <- aov(Destination.Port ~ Attack.category, data = df)
  print(summary(aov_result))
}


## 3. Correlation Analysis


In [None]:
# Correlation matrix
numerical_cols <- c("Source.Port", "Destination.Port", "Time_Duration", "Hour")
numerical_cols <- numerical_cols[numerical_cols %in% colnames(df)]

if (length(numerical_cols) > 1) {
  cor_matrix <- cor(df[, numerical_cols], use = "complete.obs")
  corrplot(cor_matrix, method = "color", type = "upper", order = "hclust")
}
