In [None]:
source("setup.r")

In [15]:
# Load necessary libraries
library(readxl)
library(ggplot2)
library(dplyr)
library(gridExtra)

# Define the path to the Excel file
excel_file <- "Output Path/consolidated_output.xlsx"

# Extract the directory path dynamically
output_dir <- dirname(excel_file)

if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

custom_colors <- c(
  "dark_purple" = "#322564",
  "dark_gray" = "#4A4A4A",
  "bright_pink" = "#9C006F",
  "pink" = "#D130A1",
  "dark_pink" = "#9C1B5E",
  "bright_purple" = "#6A3FA2",
  "dark_purple" = "#6A3FA2",
  "sky_blue" = "#0A56B1",
  "dark_sky_blue" = "#2F4F80",
  "green" = "#38A36F"
)



# Function to generate a dynamic palette based on the number of files (source flags)
generate_dynamic_palette <- function(n) {
  return(colorRampPalette(custom_colors)(n))  # Generate a gradient of distinct colors
}

# Function to group carton types
group_carton_types <- function(carton_name) {
  if (grepl("Box", carton_name)) {
    if (grepl("Small", carton_name)) {
      return("Small Boxes")
    } else if (grepl("Medium", carton_name)) {
      return("Medium Boxes")
    } else if (grepl("Large", carton_name)) {
      return("Large Boxes")
    }
  } else if (grepl("Mailer", carton_name)) {
    return("Mailers")
  } else {
    return("Other") # Catch-all for non-Box and non-Mailer types
  }
}

# Loop through each sheet in the Excel file and generate plots
sheets <- excel_sheets(excel_file)

# Store plot list for grid arrangement
for (sheet in sheets) {
  df <- read_excel(excel_file, sheet = sheet)
  
  # Apply grouping logic for carton types
  if ("name" %in% names(df)) {
    df <- df %>%
      mutate(grouped_name = sapply(name, group_carton_types))
  }

  # Ensure 'source_flag' is a factor with appropriate levels
  df$source_flag <- factor(df$source_flag, levels = unique(df$source_flag))
  df$surface_area <- as.numeric(df$surface_area)
  df$Carton_volume <- as.numeric(df$Carton_volume)

  # Remove NA values to avoid issues with facetting (Check for NA values in essential columns)
  df <- df %>%
    filter(!is.na(total_weight) & !is.na(Billed_Weight) & !is.na(dim_weight) & !is.na(Price) & !is.na(base_cost) & !is.na(Order_volume) & !is.na(Carton_volume))

  # Filter data: baseline vs other source_flags
  baseline_df <- df %>% filter(source_flag == "baseline")
  other_df <- df %>% filter(source_flag != "baseline")

  # Generate a dynamic color palette based on the number of unique source flags
  source_flags_count <- length(unique(df$source_flag))
  dynamic_palette <- generate_dynamic_palette(source_flags_count)

  # Initialize an empty list to store plots
  plot_list <- list()

  # 1: Actual_weight / Dim_weight (SUM)
  if (all(c("total_weight", "dim_weight") %in% names(df))) {
    p1 <- ggplot(df, aes(x = name, y = total_weight, fill = source_flag)) +
      geom_bar(stat = "identity", position = position_dodge(), alpha = 1) +
      scale_fill_manual(values = dynamic_palette) +
      labs(title = "Actual Weight / Dim Weight", x = "Carton Type", y = "Sum of Total Weight") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    ggsave(filename = file.path(output_dir, paste0("actual_vs_dim_weight.png")), plot = p1, width = 8, height = 6)
  }

  # 2: Actual_weight / Billed_Weight (SUM)
  if (all(c("total_weight", "Billed_Weight") %in% names(df))) {
    p2 <- ggplot(df, aes(x = name, y = total_weight, fill = source_flag)) +
      geom_bar(stat = "identity", position = position_dodge(), alpha = 1) +
      scale_fill_manual(values = dynamic_palette) +
      labs(title = "Actual Weight / Billed Weight", x = "Carton Type", y = "Sum of Total Weight") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    ggsave(filename = file.path(output_dir, paste0("actual_vs_billed_weight.png")), plot = p2, width = 8, height = 6)
  }

  # 3: Dim_weight / Billed_Weight (SUM)
  if (all(c("dim_weight", "Billed_Weight") %in% names(df))) {
    p3 <- ggplot(df, aes(x = name, y = dim_weight, fill = source_flag)) +
      geom_bar(stat = "identity", position = position_dodge(), alpha = 1) +
      scale_fill_manual(values = dynamic_palette) +
      labs(title = "Dim Weight / Billed Weight", x = "Carton Type", y = "Sum of Dim Weight") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    ggsave(filename = file.path(output_dir, paste0("dim_weight_vs_billed_weight.png")), plot = p3, width = 8, height = 6)
  }

  # 4: Volume_utilization / Carton_type(name) (AVG)
  if (all(c("volume_utilization", "name") %in% names(df))) {
    p4 <- ggplot(df, aes(x = name, y = volume_utilization, fill = source_flag)) +
      geom_bar(stat = "identity", position = position_dodge(), alpha = 1) +
      scale_fill_manual(values = dynamic_palette) +
      labs(title = "Volume Utilization / Carton Type", x = "Carton Type", y = "Average Volume Utilization") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    ggsave(filename = file.path(output_dir, paste0("volume_utilization_by_carton_type.png")), plot = p4, width = 8, height = 6)
  }

  # 5: Price / Carton_type(name) (AVG)
  if (all(c("Price", "name") %in% names(df))) {
    p5 <- ggplot(df, aes(x = name, y = Price, fill = source_flag)) +
      geom_bar(stat = "identity", position = position_dodge(), alpha = 1) +
      scale_fill_manual(values = dynamic_palette) +
      labs(title = "Price / Carton Type", x = "Carton Type", y = "Average Price") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    ggsave(filename = file.path(output_dir, paste0("price_by_carton_type.png")), plot = p5, width = 8, height = 6)
  }

  # 6: Counts of Cartons Billed at Actual(dimmed) vs. Dim Weight (COUNT)
  if (all(c("Dimmed", "dim_weight") %in% names(df))) {
    p6 <- ggplot(df, aes(x = Dimmed, y = dim_weight, fill = source_flag)) +
      geom_bar(stat = "identity", position = position_dodge(), alpha = 1) +
      scale_fill_manual(values = dynamic_palette) +
      labs(title = "Counts of Cartons Billed at Dimmed vs. Dim Weight", x = "Dimmed Count", y = "Dim Weight Count") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    ggsave(filename = file.path(output_dir, paste0("dimmed_vs_dim_weight_counts.png")), plot = p6, width = 8, height = 6)
  }

    # 7: SA Agg Comp 
    if (all(c("surface_area", "Carton_volume") %in% names(df))) {
      # Summarize data for pie chart (sum of surface_area by source_flag)
      df_summary <- df %>%
        group_by(source_flag) %>%
        summarise(total_surface_area = sum(surface_area, na.rm = TRUE))
      
      # Calculate percentage for each segment
      df_summary <- df_summary %>%
        mutate(percentage = total_surface_area / sum(total_surface_area) * 100)
    
      # Create the pie chart
      pie_data <- df_summary$total_surface_area
      labels <- paste0(df_summary$source_flag, " (", round(df_summary$percentage, 1), "%)")
    
      # Plot pie chart
      png(file.path(output_dir, paste0("SA_agg_comp_pie_chart.png")), width = 8, height = 6, units = "in", res = 300)
      pie(pie_data, labels = labels, col = dynamic_palette, main = "SA Agg Comp (Surface Area)", cex = 1.2)
      dev.off()  # Save the plot as a PNG file
    }

  # 8: Orders Billed Over Actual Weight (COUNT)
  if (all(c("Billed_over_Actual", "total_weight") %in% names(df))) {
    p8 <- ggplot(df, aes(x = name, y = Billed_over_Actual, fill = source_flag)) +
      geom_bar(stat = "identity", position = position_dodge(), alpha = 1) +
      scale_fill_manual(values = dynamic_palette) +
      labs(title = "Orders Billed Over Actual Weight", x = "Carton Type", y = "Count of Billed Over Actual") +
      theme(axis.text.x = element_text(angle = 45, hjust = 1))
    ggsave(filename = file.path(output_dir, paste0("orders_billed_over_actual_weight.png")), plot = p8, width = 8, height = 6)
  }
}

print("All individual plots and their respective grids have been generated and saved as PNG files.")
print(summary(df$surface_area))
print(summary(df$Carton_volume))

[1] "All individual plots and their respective grids have been generated and saved as PNG files."
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  545.9   663.0  1140.0  1268.4  1657.7  2532.2 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  541.9   756.0  1584.0  2127.1  2696.6  5411.4 
