In [63]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import norm
from matplotlib.backends.backend_pdf import PdfPages
from datetime import datetime


In [64]:
file_path = r"file_path .xlsx"
df = pd.read_excel(file_path)

In [65]:
measurement_columns = ["length", "width", "cuff_length", "cuff_width", "thumb_length", "thumb_width",
                       "index_length", "index_width", "middle_length", "middle_width", "ring_length", "ring_width",
                       "little_length", "little_width"]

# Filter the DataFrame to include only the existing columns in the dataset
valid_columns = [col for col in measurement_columns if col in df.columns]

# Group data by product name and size
grouped_data = df.groupby(["product_name", "size"])

pdf_filename = r"path\Histograms_data.pdf"

# fig, ax = plt.subplots(figsize=(10, 6))
# ax.axis("off")  # Hide axes

A4_WIDTH = 11.69
A4_HEIGHT = 8.27




In [66]:
def create_front_page(pdf):
    fig, ax = plt.subplots(figsize=(A4_WIDTH, A4_HEIGHT))
    ax.axis("off")  # Hide axes

    # Add a title
    title = "Product Measurement Report"
    ax.text(0.5, 0.8, title, fontsize=24, fontweight="bold", ha="center", va="center", color="darkblue")

    # Add a subtitle
    subtitle = "Detailed Analysis of Product Measurements by Size"
    ax.text(0.5, 0.7, subtitle, fontsize=16, ha="center", va="center", color="gray")

    # Add a company logo (optional)
    # logo = plt.imread("company_logo.png")  # Load your logo file
    # ax.imshow(logo, extent=[0.3, 0.7, 0.4, 0.6], aspect="auto")  # Adjust position and size

    # Add metadata
    metadata = f"Generated on : {datetime.today().strftime('%Y-%m-%d')}\n\nCreated By     : Gayanga Wijesekara"
    ax.text(0.1, 0.1, metadata, fontsize=12, ha="left", va="bottom", color="black")

    # Add decorative elements (e.g., lines or shapes)
    ax.axhline(0.75, color="darkblue", linewidth=2, xmin=0.1, xmax=0.9)  # Horizontal line
    ax.axhline(0.65, color="darkblue", linewidth=1, xmin=0.1, xmax=0.9)  # Horizontal line

    # Save the front page
    pdf.savefig(fig)
    plt.close(fig)

In [67]:
with PdfPages(pdf_filename) as pdf:
    # Add the front page
    create_front_page(pdf)

    # Process data and generate other pages
    for (product, size), data in grouped_data:
        print(f"Processing: {product} (Size: {size})")

        # --- Extract Dynamic Specifications ---
        specifications = {}
        for column in measurement_columns:
            if column in data.columns and not data[column].isna().all():
                min_val = data[column].min()
                max_val = data[column].max()
                specifications[column] = f"{min_val:.1f} - {max_val:.1f}"

        # --- PAGE 1: Size Details ---
        fig, ax = plt.subplots(figsize=(A4_WIDTH, A4_HEIGHT))
        ax.axis("off")  # Hide axes

        # Table Headers
        table_data = [["Measurement", "Specification", "Mean", "StDev"]]

        for column in measurement_columns:
            if column in data.columns and not data[column].isna().all():
                values = data[column].dropna()
                mean_value = f"{values.mean():.2f}"
                std_dev = f"{values.std():.2f}" if not np.isnan(values.std()) else "0.00"

                spec = specifications.get(column, "N/A")

                table_data.append([column.replace("_", " ").capitalize(), spec, mean_value, std_dev])

        # Define colors for the table
        header_color = "#40466e"  # Dark blue for header
        row_colors = ["#f1f1f2", "w"]  # Alternating row colors
        cell_colors = [[header_color] * len(table_data[0])] + [
            [row_colors[i % 2] for i in range(len(table_data[0]))] for _ in range(len(table_data) - 1)
        ]

        # Create table with custom styling
        table = ax.table(
            cellText=table_data,
            colLabels=None,
            cellLoc="center",
            loc="center",
            cellColours=cell_colors,
            fontsize=12,
            colWidths=[0.3, 0.3, 0.2, 0.2],  # Adjust column widths
        )

        # Customize table appearance
        table.auto_set_font_size(False)
        table.set_fontsize(12)
        table.scale(1.2, 1.2)  # Scale table size

        # Set header row font color to white
        for (i, j), cell in table.get_celld().items():
            if i == 0:  # Header row
                cell.set_text_props(color="white", fontweight="bold")
            cell.set_edgecolor("gray")  # Add grid lines

        ax.set_title(f"PRODUCT TYPE {product}\nSIZE {size}", fontsize=16, fontweight="bold", color="darkblue")

        # Adjust layout to fit A4 size
        plt.tight_layout()
        pdf.savefig(fig)
        plt.close(fig)

        # --- HISTOGRAM PAGES (4 per page) ---
        fig, axes = plt.subplots(2, 2, figsize=(A4_WIDTH, A4_HEIGHT))
        axes = axes.flatten()
        plot_count = 0

        for column in measurement_columns:
            if column in data.columns and not data[column].isna().all():
                values = data[column].dropna()
                mean = values.mean()
                std_dev = values.std()
                min_val = values.min()
                max_val = values.max()

                # Plot histogram
                ax = axes[plot_count]
                ax.hist(values, bins=10, density=True, alpha=0.7, color='b', edgecolor='black', label="Histogram")

                # Standard deviation curve
                x = np.linspace(min_val, max_val, 100)
                y = (1 / (std_dev * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mean) / std_dev) ** 2)
                ax.plot(x, y, color='red', linewidth=2, label="Std Dev Curve")

                # Add vertical lines for min, max, and mean
                ax.axvline(min_val, color='r', linestyle='dashed', linewidth=1, label=f"Min: {min_val:.2f}")
                ax.axvline(max_val, color='g', linestyle='dashed', linewidth=1, label=f"Max: {max_val:.2f}")
                ax.axvline(mean, color='black', linestyle='dashed', linewidth=1, label=f"Mean: {mean:.2f}")

                ax.set_title(f"{column.capitalize()} Distribution\n{product} (Size {size})")
                ax.set_xlabel(column.capitalize())
                ax.set_ylabel("Density")
                ax.legend()

                plot_count += 1

                # Save the page if 4 histograms are plotted
                if plot_count == 4:
                    plt.tight_layout()
                    pdf.savefig(fig)
                    plt.close(fig)
                    fig, axes = plt.subplots(2, 2, figsize=(A4_WIDTH, A4_HEIGHT))
                    axes = axes.flatten()
                    plot_count = 0

        # Save remaining plots
        if plot_count > 0:
            for i in range(plot_count, 4):
                fig.delaxes(axes[i])
            plt.tight_layout()
            pdf.savefig(fig)
            plt.close(fig)

print(f"PDF saved successfully: {pdf_filename}")

Processing: 11-511 (Size: 8)


  y = (1 / (std_dev * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mean) / std_dev) ** 2)
  y = (1 / (std_dev * np.sqrt(2 * np.pi))) * np.exp(-0.5 * ((x - mean) / std_dev) ** 2)


Processing: 11-542 (Size: 8)
Processing: 11-800 (Size: 7)
Processing: 11-800 (Size: 11)
Processing: 11-801 (Size: 5)
Processing: 11-801 (Size: 6)
Processing: 11-801 (Size: 8)
Processing: 11-840 (Size: 5)
Processing: 11-840 (Size: 6)
Processing: 11-840 (Size: 7)
Processing: 11-840 (Size: 8)
Processing: 11-840 (Size: 11)
Processing: 11-849 (Size: 6)
Processing: 80-813 (Size: 8)
Processing: R840 (Size: 8)
PDF saved successfully: C:\Users\cmbajay8\OneDrive - Ansell Healthcare\Desktop\Gayanga\Group_data\Histograms_data.pdf
