In [None]:
source("setup.r")

In [114]:
# Load necessary libraries
library(readxl)
library(ggplot2)
library(dplyr)
library(gridExtra)

# Define the path to the Excel file
excel_file <- "Output Path/consolidated_output.xlsx"

# Extract the directory path dynamically
output_dir <- dirname(excel_file)

# Create the output directory if it doesn't exist
if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

# Custom color palette
custom_colors <- c(
  "dark_gray" = "#4A4A4A",
  "bright_pink" = "#9C006F",
  "seafoam" = "#00ff80",
  "bright_purple" = "#8100fb",
  "sky_blue" = "#0A56B1",
  "dark_sky_blue" = "#2F4F80",
  "green" = "#38A36F",
  "red" = "#ff0000",
  "orange" = "#ee6200"
)


# Function to generate a dynamic palette based on the number of unique source flags
generate_dynamic_palette <- function(n) {
  return(colorRampPalette(custom_colors)(n))  # Generate a gradient of distinct colors
}

# Function to group carton types
group_carton_types <- function(carton_name) {
  if (grepl("Box", carton_name)) {
    if (grepl("Small", carton_name)) {
      return("Small Boxes")
    } else if (grepl("Medium", carton_name)) {
      return("Medium Boxes")
    } else if (grepl("Large", carton_name)) {
      return("Large Boxes")
    }
  } else if (grepl("Mailer", carton_name)) {
    return("Mailers")
  } else {
    return("Other") # Catch-all for non-Box and non-Mailer types
  }
}

# Loop through each sheet in the Excel file and generate plots
sheets <- excel_sheets(excel_file)

# Store plot list for grid arrangement
for (sheet in sheets) {
  df <- read_excel(excel_file, sheet = sheet)
  
  # Apply grouping logic for carton types
  if ("name" %in% names(df)) {
    df <- df %>%
      mutate(grouped_name = sapply(name, group_carton_types))
  }

  # Ensure 'source_flag' is a factor with appropriate levels
  df$source_flag <- factor(df$source_flag, levels = unique(df$source_flag))
  df$surface_area <- as.numeric(df$surface_area)
  df$Carton_volume <- as.numeric(df$Carton_volume)

  # Remove NA values to avoid issues with facetting (Check for NA values in essential columns)
  df <- df %>%
    filter(!is.na(total_weight) & !is.na(Billed_Weight) & !is.na(dim_weight) & !is.na(Price) & !is.na(base_cost) & !is.na(Order_volume) & !is.na(Carton_volume))

print(names(df))
  # Filter data: baseline vs other source_flags
  baseline_df <- df %>% filter(source_flag == "baseline")
  other_df <- df %>% filter(source_flag != "baseline")

  # Generate a dynamic color palette based on the number of unique source flags
  source_flag_count <- length(unique(df$name))
  dynamic_palette <- generate_dynamic_palette(source_flag_count)

  # Initialize an empty list to store plots
  plot_list <- list()

summary_data <- df %>%
  group_by(source_flag) %>%
  summarize(
    Billed_Weight = sum(Billed_Weight, na.rm = TRUE),
    Billed_over_Actual = sum(Billed_over_Actual, na.rm = TRUE),
    dim_weight = sum(dim_weight, na.rm = TRUE),
    total_weight = sum(total_weight, na.rm = TRUE)
  )

    # 1: Actual_weight / Dim_weight (SUM) without the second axis
    if (all(c("total_weight", "dim_weight") %in% names(df))) {
      p1 <- ggplot(summary_data, aes(x = source_flag)) +
        geom_bar(aes(y = dim_weight, fill = "dim_weight"), stat = "identity", position = "dodge") +
        scale_fill_manual(values = dynamic_palette) +
        ggtitle("Combo Chart: Dim Weight vs Total Weight") +
        xlab("Source Name") +
        ylab("Dim Weight") 
    
      p1 <- p1 + geom_line(aes(y = total_weight / 2, group = 1, color = "total_weight"), size = 1.5) + 
        geom_point(aes(y = total_weight / 2, color = "total_weight"), size = 3) +
        scale_y_continuous(name = "Dim Weight", labels = scales::label_number(scale = 1), breaks = seq(0, max(summary_data$dim_weight), by = 10000)) +
        theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
        guides(fill = guide_legend(title = ""), color = guide_legend(title = ""))
      
      ggsave(filename = file.path(output_dir, paste0("actual_vs_dim_weight.png")), plot = p1, width = 8, height = 6)
    }

    # 2: Actual_weight / Billed_Weight (SUM)
    if (all(c("total_weight", "Billed_Weight") %in% names(df))) {
      p2 <- ggplot(summary_data, aes(x = source_flag)) +
        geom_bar(aes(y = Billed_Weight, fill = "Billed_Weight"), stat = "identity", position = "dodge") +
        scale_fill_manual(values = dynamic_palette) +
        ggtitle("Combo Chart: Billed Weight vs Total Weight") +
        xlab("Source Name") +
        ylab("Billed Weight") 
      p2 <- p2 + geom_line(aes(y = total_weight / 2, group = 1, color = "total_weight"), size = 1.5) +
        geom_point(aes(y = total_weight / 2, color = "total_weight"), size = 3) +
        scale_y_continuous(name = "Billed Weight", labels = scales::label_number(scale = 1), breaks = seq(0, max(summary_data$Billed_Weight), by = 10000)) +
        theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
        guides(fill = guide_legend(title = ""), color = guide_legend(title = ""))
      ggsave(filename = file.path(output_dir, paste0("actual_vs_billed_weight.png")), plot = p2, width = 8, height = 6)
    }

      # 3: Dim_weight / Billed_Weight (SUM)
    if (all(c("dim_weight", "Billed_Weight", "source_flag") %in% names(df))) {
      # Summarize data by source_flag
      summary_data <- df %>%
        group_by(source_flag) %>%
        summarize(
          total_dim_weight = sum(dim_weight, na.rm = TRUE),
          total_billed_weight = sum(Billed_Weight, na.rm = TRUE)
        )
      
      # Create the plot for Combo Chart: Dim Weight vs Billed Weight
      p3 <- ggplot(summary_data, aes(x = source_flag)) +
        geom_line(aes(y = total_dim_weight, group = 1, color = "Dim_Weight"), size = 1.5, alpha = 0.8) +
        geom_point(aes(y = total_dim_weight, color = "Dim_Weight"), size = 3, alpha = 0.8) +  
        geom_bar(aes(y = total_billed_weight, fill = "Billed_Weight"), stat = "identity", position = "dodge", alpha = 0.7) +
        scale_fill_manual(values = dynamic_palette) +
        scale_y_continuous(breaks = seq(0, max(summary_data$total_dim_weight, summary_data$total_billed_weight), by = 10000)) +
        labs(title = "Sum of Dim Weight and Billed Weight by Source Flag", x = "Source Flag", y = "Total Weight") +
        theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
        guides(fill = guide_legend(title = ""), color = guide_legend(title = ""))
     ggsave(filename = file.path(output_dir, paste0("dim_weight_vs_billed_weight.png")), plot = p3, width = 8, height = 6)
   }
    # 4: Volume_utilization / Carton_type(name) (AVG)
    if (all(c("volume_utilization", "name") %in% names(df))) {
      summary_data <- df %>%
        group_by(name, source_flag) %>%
        summarize(avg_volume_utilization = mean(volume_utilization, na.rm = TRUE))  
      # Create the plot
      p4 <- ggplot(summary_data, aes(x = name, y = avg_volume_utilization, fill = source_flag)) +
        geom_bar(stat = "identity", position = position_dodge(), alpha = 1, color = "white") +  # Use average volume utilization
        scale_fill_manual(values = dynamic_palette) +
        labs(title = "Average Volume Utilization / Carton Type", x = "Carton Type", y = "Average Volume Utilization") +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))
    ggsave(filename = file.path(output_dir, paste0("volume_utilization_by_carton_type.png")), plot = p4, width = 8, height = 6)
  }

  # 5: Price / Carton_type(name) (AVG)
    if (all(c("Price", "name") %in% names(df))) {
      
      # Calculate average price by carton type
      summary_data <- df %>%
        group_by(name, source_flag) %>%
        summarize(avg_price = mean(Price, na.rm = TRUE) / 100)  # Calculate the average price
      
      # Create the plot
      p5 <- ggplot(summary_data, aes(x = name, y = avg_price, fill = source_flag)) +
        geom_bar(stat = "identity", position = position_dodge(), alpha = 1, color = "white") + 
        scale_fill_manual(values = dynamic_palette) +
        scale_y_continuous(labels = scales::label_number(scale = 1, big.mark = ",")) +
        labs(title = "Average Price / Carton Type", x = "Carton Type", y = "Average Price") +
        theme(axis.text.x = element_text(angle = 45, hjust = 1))
    ggsave(filename = file.path(output_dir, paste0("price_by_carton_type.png")), plot = p5, width = 8, height = 6)
  }

    # 6: Counts of Dimmed (Yes/No) by Source Flag - Stacked Bar Plot
    if (all(c("Dimmed", "source_flag") %in% names(df))) {
      
      # Convert 'Dimmed' to a factor to avoid continuous values issue
      df$Dimmed <- factor(df$Dimmed, levels = c("Yes", "No"))  
      
      df_dimmed <- df %>%
        group_by(Dimmed, source_flag) %>%
        summarize(count = n())
      p6 <- ggplot(df_dimmed, aes(x = source_flag, y = count, fill = Dimmed)) +
        geom_bar(stat = "identity", position = "stack", alpha = 1) + 
        scale_fill_manual(values = dynamic_palette) +  
        labs(title = "Counts of Dimmed (Yes/No) by Source Flag", x = "Source Flag", y = "Count") +
        theme(axis.text.x = element_text(angle = 45, hjust = 1)) 
      ggsave(filename = file.path(output_dir, paste0("dimmed_vs_count.png")), plot = p6, width = 8, height = 6)
    }

    
    # 7: SA Agg Comp (Surface Area) - Area Chart
    if (all(c("surface_area", "source_flag") %in% names(df))) {
    summary_data <- df %>%
    group_by(source_flag) %>%
    summarize(total_surface_area = sum(surface_area, na.rm = TRUE))
      p7 <- ggplot(summary_data, aes(x = source_flag)) +
        geom_bar(aes(y = total_surface_area, fill = source_flag), stat = "identity", position = "dodge") +
        scale_fill_manual(values = dynamic_palette) +
        ggtitle("Surface Area") +
        xlab("Source Name") +
        ylab("Total Surface Area") +
        scale_y_continuous(labels = scales::label_number(scale = 1)) +#, breaks = seq(0, max(summary_data$total_surface_area), by = 1000)) +
        guides(fill = guide_legend(title = ""), color = guide_legend(title = ""))
      ggsave(filename = file.path(output_dir, paste0("SA_agg_comp.png")), plot = p7, width = 8, height = 6)
    }

    #8
    if (all(c("Billed_over_Actual", "orderId", "source_flag") %in% names(df))) {
      summary_data <- df %>%
        group_by(Billed_over_Actual, source_flag) %>%
        summarise(order_count = n()) 
      p8 <- ggplot(summary_data, aes(x = Billed_over_Actual, y = order_count, color = source_flag, group = source_flag)) +
        geom_line(size = 1.5, linewidth = 1, alpha = 0.4) +
        geom_point(size = 3,  alpha = 0.5) +
        scale_color_manual(values = dynamic_palette) +
        ggtitle("Combo Chart: Billed over Actual vs Count of Order ID by Source Flag") +
        xlab("Billed over Actual") +
        ylab("Count of Order ID") +
        scale_x_continuous(breaks = seq(0, max(summary_data$Billed_over_Actual), by = 5)) +
        scale_y_continuous(breaks = seq(0, max(summary_data$order_count), by = 1000)) +
        theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
        guides(color = guide_legend(title = "Source Flag"))
      ggsave(filename = file.path(output_dir, paste0("orders_billed_over_actual.png")), plot = p8, width = 8, height = 6)
    }

}

print("All individual plots and their respective grids have been generated and saved as PNG files.")
print(summary(df$surface_area))
print(summary(df$Carton_volume))


 [1] "orderId"            "refId"              "index"             
 [4] "name"               "dimensions"         "Price"             
 [7] "base_cost"          "Carton_volume"      "Order_volume"      
[10] "volume_utilization" "surface_area"       "total_weight"      
[13] "net_weight"         "tare_weight"        "weight_utilization"
[16] "dim_weight"         "item_count"         "source_flag"       
[19] "Dimmed"             "Billed_Weight"      "Billed_over_Actual"
[22] "L"                  "W"                  "H"                 
[25] "grouped_name"      


[1m[22m`summarise()` has grouped output by 'name'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'name'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Dimmed'. You can override using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'Billed_over_Actual'. You can override using the `.groups` argument.


[1] "All individual plots and their respective grids have been generated and saved as PNG files."
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  175.4   416.6   541.6   530.2   565.4  2277.6 
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  98.55  316.39  451.50  498.13  493.44 5088.75 
