# Univariate, Bivariate, and Multivariate Analysis (R)


In [None]:
# Load libraries and data
library(tidyverse)
library(ggplot2)
library(corrplot)

df <- read.csv("../../data/fraud_data.csv", stringsAsFactors = FALSE)
cat("Data loaded:", dim(df), "\n")


In [None]:
# Univariate analysis
if("TransactionAmt" %in% colnames(df)) {
  cat("Univariate Analysis: TransactionAmt\n")
  cat("Mean:", mean(df$TransactionAmt, na.rm = TRUE), "\n")
  cat("Median:", median(df$TransactionAmt, na.rm = TRUE), "\n")
  cat("Std:", sd(df$TransactionAmt, na.rm = TRUE), "\n")
  
  # Histogram
  ggplot(df, aes(x = TransactionAmt)) +
    geom_histogram(bins = 50, fill = "steelblue", alpha = 0.7) +
    labs(title = "Transaction Amount Distribution", x = "Transaction Amount", y = "Frequency") +
    theme_minimal()
  ggsave("../../outputs/figures/univariate_transactionamt_r.png", width = 10, height = 6, dpi = 300)
}


In [None]:
# Bivariate analysis
if("TransactionAmt" %in% colnames(df)) {
  # Box plot
  ggplot(df, aes(x = as.factor(isFraud), y = TransactionAmt)) +
    geom_boxplot() +
    scale_y_log10() +
    labs(title = "Transaction Amount by Fraud Status", x = "Fraud Status", y = "Transaction Amount") +
    theme_minimal()
  ggsave("../../outputs/figures/bivariate_transaction_fraud_r.png", width = 10, height = 6, dpi = 300)
  
  # Correlation
  corr <- cor(df$TransactionAmt, df$isFraud, use = "complete.obs")
  cat("Correlation between TransactionAmt and isFraud:", corr, "\n")
}


In [None]:
# Multivariate analysis
key_features <- c("TransactionAmt", "card1", "card2", "card3", "card5", "isFraud")
key_features <- key_features[key_features %in% colnames(df)]

if(length(key_features) > 1) {
  corr_matrix <- cor(df[key_features], use = "complete.obs")
  corrplot(corr_matrix, method = "color", type = "upper", 
           order = "hclust", tl.cex = 0.8)
  cat("Multivariate correlation analysis complete!\n")
}
