In [13]:
import pandas as pd

# Load data (ensure to replace the path securely or use environment variables)
#data = pd.read_csv(r'C:\Users\jpscw\Documents\EDA Anahy\full_data_best_secret.csv')

# Prompt the user to input the file path for the CSV file
#file_path = input("Please enter the path to the CSV file: ")

# Step 2: Load the CSV into a DataFrame
#try:
   # data = pd.read_csv(file_path)
    #print("CSV file loaded successfully.")
#except Exception as e:
    #print(f"Error loading CSV file: {e}")
    #exit()  # Exit if the file cannot be loaded

In [14]:
# Filter the data to keep only rows where qty_items_sold == 1
df_filtered = data[data['qty_items_sold'] == 1]

# Group by 'order_code' and filter out groups with more than 1 row (i.e., keep single-item orders)
df_single = df_filtered.groupby('order_code').filter(lambda x: len(x) == 1)

In [15]:
def calculate_box_volumes(df):
    df['width_in_millimeter'] = pd.to_numeric(df['width_in_millimeter'], errors='coerce')
    df['length_in_millimeter'] = pd.to_numeric(df['length_in_millimeter'], errors='coerce')
    df['height_in_millimeter'] = pd.to_numeric(df['height_in_millimeter'], errors='coerce')

    # Calculate the volume for each row
    df['volume_in_cubic_mm'] = df['width_in_millimeter'] * df['length_in_millimeter'] * df['height_in_millimeter']
    
    # Group by 'display_name' and calculate the mean for width, length, height, and volume
    box_volumes = df.groupby('display_name').agg(
        average_width_mm=('width_in_millimeter', 'mean'),
        average_length_mm=('length_in_millimeter', 'mean'),
        average_height_mm=('height_in_millimeter', 'mean'),
        average_volume_cubic_mm=('volume_in_cubic_mm', 'mean')
    ).reset_index()
    
    # Convert from cubic millimeters to cubic centimeters for more intuitive results
    box_volumes['average_volume_cubic_cm'] = box_volumes['average_volume_cubic_mm'] / 1000  # 1 cm³ = 1000 mm³

    # Sort the results for easier interpretation
    box_volumes_sorted = box_volumes.sort_values(by='average_volume_cubic_cm', ascending=False).reset_index(drop=True)
    
    return box_volumes_sorted

# Usage
box_volumes_df = calculate_box_volumes(df_single)


In [16]:
# Initialize a dictionary to store the maximum height for each unique item (defined by category and size)
item_max_heights = {}

# Step 1: Iterate over each row in the orders DataFrame
for _, row in data.iterrows():
    # Extract relevant columns: category, size, and box used (display_name)
    item_category = row['product_navision_detail_category']
    item_size = row['sap_main_size']
    display_name = row['display_name']

    # Skip rows where category, size, or display_name are missing
    if pd.isna(item_category) or pd.isna(item_size) or pd.isna(display_name):
        continue

    # Define a unique key for each item based on category and size
    item_key = (item_category, item_size)

    # Filter the box data to find the corresponding box for this item
    box_data = box_volumes_df[box_volumes_df['display_name'] == display_name]

    # Check if box data exists for the display_name
    if not box_data.empty:
        # Get the smallest box by volume (based on the 'average_volume_cubic_cm' column)
        smallest_box = box_data.nsmallest(1, 'average_volume_cubic_cm')

        # Get the max dimension (max of width, length, height) from the smallest box
        max_dimension = smallest_box[['average_width_mm', 'average_length_mm', 'average_height_mm']].max(axis=1).values[0]

        # Update the dictionary with the maximum dimension for the item
        if item_key in item_max_heights:
            # Keep the maximum dimension if it is larger than the existing value
            item_max_heights[item_key] = max(item_max_heights[item_key], max_dimension)
        else:
            item_max_heights[item_key] = max_dimension

# Step 2: Create a DataFrame for the output dictionary
output_data = []

# Iterate over the unique item keys and their corresponding maximum heights
for item_key, height in item_max_heights.items():
    category, size = item_key
    output_data.append({
        'product_navision_detail_category': category,
        'sap_main_size': size,
        'opt_height': height
    })

# Convert the output data into a DataFrame
output_df = pd.DataFrame(output_data)

# Convert the output to a dictionary for further use (optional)
output_dict = output_df.to_dict(orient='records')



In [18]:
###IN CASE YOU PREFER TO WRITE WITH A JSON FILE
#import json
#with open('height_dict.json', 'w') as f:
    #json.dump(output_dict, f)

In [19]:
# Write the dictionary to a .py file with UTF-8 encoding
with open('height_dict.py', 'w', encoding='utf-8') as f:
    f.write('height_dict = ' + str(output_dict))