# DeepPack3D Training Notebook for 3D Bin Packing

This notebook trains a reinforcement learning model based on the DeepPack3D implementation, specifically customized for the 3D bin packing problem with constraints to prevent floating boxes, overlapping, and boxes extending outside the container.

## Setup and Environment

In [None]:
# Install required packages
!pip install tensorflow==2.10.0
!pip install matplotlib seaborn numpy pandas

In [None]:
# Import necessary libraries
import os
import sys
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import seaborn as sns
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
import random
import time
import json
from datetime import datetime
import pandas as pd
from IPython.display import display, clear_output

## Clone and Import DeepPack3D

First, we'll clone the DeepPack3D repository if it's not already available, and then import its modules.

In [None]:
# Check if DeepPack3D is already available, otherwise clone it
if not os.path.exists('DeepPack3D'):
    !git clone https://github.com/zgtcktom/DeepPack3D.git

# Add DeepPack3D to the Python path
sys.path.append('DeepPack3D')

In [None]:
# Import DeepPack3D modules
try:
    from DeepPack3D.env import MultiBinPackerEnv
    from DeepPack3D.agent import Agent, HeuristicAgent
    from DeepPack3D.geometry import Cuboid
    from DeepPack3D.SpacePartitioner import SpacePartitioner
    from DeepPack3D.conveyor import Conveyor, FileConveyor, InputConveyor
    print("Successfully imported DeepPack3D modules")
except ImportError as e:
    print(f"Error importing DeepPack3D modules: {e}")
    print("Please check the repository structure and paths")

## Define Custom Environment

We'll extend the MultiBinPackerEnv to create a custom environment that specifically addresses our requirements:
1. No floating boxes
2. No overlapping boxes
3. No boxes outside the container
4. Optimized space utilization

In [None]:
class CustomBinPackerEnv(MultiBinPackerEnv):
    def __init__(self, n_bins=1, size=(32, 32, 32), k=5, max_bins=None, max_items=None, 
                 replace='all', verbose=False, prealloc_bins=0, prealloc_items=0, 
                 shuffle=False, use_rotate=True, use_skip=True, 
                 strict_support=True, min_support_percentage=0.5):
        super().__init__(n_bins, size, k, max_bins, max_items, replace, verbose, 
                        prealloc_bins, prealloc_items, shuffle, use_rotate, use_skip)
        
        # Additional parameters for strict stability checking
        self.strict_support = strict_support
        self.min_support_percentage = min_support_percentage
    
    def placeable_coords(self, packer, h_map, size):
        """Enhanced version of placeable_coords that enforces stricter stability requirements"""
        xz = []
        splits = {}
        for split in packer.free_splits:
            if (split.top < self.size[1]) or (not split.fit(size)):
                continue
            x, y, z = split.coord
            xz.append((x, z))
            splits[(x, z)] = split
        xz = set(xz)
        
        w, h, d = size
        xyz = []
        for x, z in xz:
            placement = h_map[z:z + d, x:x + w]
            y = np.amax(placement)
            
            # Calculate support percentage (how much of the bottom face is supported)
            if y > 0 and self.strict_support:  # Only check if not on the ground and strict mode is enabled
                support_count = np.count_nonzero(placement == y)
                support_percentage = support_count / (d * w)
                
                # Only allow placement if support percentage meets minimum requirement
                if support_percentage >= self.min_support_percentage:
                    xyz.append((x, y, z, splits[(x, z)]))
            else:
                # Original condition for ground placement or when strict mode is disabled
                if np.count_nonzero(placement == y) / (d * w) > 0.5:
                    xyz.append((x, y, z, splits[(x, z)]))
        
        return xyz
    
    def step(self, action):
        """Enhanced step function with improved reward calculation"""
        items, h_maps, actions = self.state()
        
        # item, bin, rotation_placement
        i, j, k = action
        _, (x, y, z), (w, h, d), _ = actions[i][j][k]
        
        packer = self.packers[j]
        cuboid = Cuboid(x, y, z, w, h, d)
        if not packer.add(cuboid):
            raise Exception(f'invalid space {cuboid}')
        self.conveyor.grab(i)
        
        next_state = self.state(step=True)
        
        # Enhanced reward shaping
        items, h_maps, actions = next_state
        
        item = items[i] if i < len(items) else None
        h_map = h_maps[j]
        
        # Volume utilization
        volume = np.sum([split.volume for split in packer.splits])
        
        # Pyramid score (encourages stable configurations)
        pyramid = volume / np.sum(h_map) if np.sum(h_map) > 0 else 0
        
        # Compactness score (encourages dense packing)
        max_height = np.amax(h_map) if h_map.size > 0 else 0
        compactness = volume / np.prod((packer.size[0], max_height, packer.size[2])) if max_height > 0 else 0
        
        # Contact score (encourages contact with walls and other items)
        contact_score = 0
        if x == 0 or x + w == packer.size[0]:  # Contact with X walls
            contact_score += 0.1
        if z == 0 or z + d == packer.size[2]:  # Contact with Z walls
            contact_score += 0.1
        if y == 0:  # Contact with floor
            contact_score += 0.2
        
        # Combined reward
        reward = 0.3 * pyramid + 0.3 * compactness + 0.4 * contact_score
        
        # Check if we're done
        done = len(self.indices(actions)) == 0
        
        # Handle bin replacement logic (same as original)
        if done:
            if self.max_bins != -1 and self.used_bins + 1 > self.max_bins:
                for i, packer in enumerate(packer for packer in self.packers if packer.space_utilization() != 0):
                    self.used_packers.append(packer)
                    loc = self.packers.index(packer)
                    if self.verbose:
                        print(f'bin {self.used_bins - self.n_bins + i}, loc: {loc}, space util: {packer.space_utilization() * 100:.2f}, packed items: {len(packer.splits)}')
                done = True
            else:
                if self.replace == 'max':
                    loc = np.argmax([packer.space_utilization() for packer in self.packers])
                    packer = self.packers[loc]
                    if self.verbose:
                        print(f'bin {self.used_bins - self.n_bins}, loc: {loc}, space util: {packer.space_utilization() * 100:.2f}, packed items: {len(packer.splits)}')

                    self.used_packers.append(self.packers[loc])
                    self.packers[loc] = SpacePartitioner(self.size)
                    self.packers[loc].reset()
                    added = 1
                    self.used_bins += 1
                elif self.replace == 'all':
                    added = 0
                    while True:
                        loc = np.argmax([packer.space_utilization() for packer in self.packers])
                        packer = self.packers[loc]
                        
                        if packer.space_utilization() == 0:
                            break
                        if self.max_bins != -1 and self.used_bins + 1 > self.max_bins:
                            break
                        if self.verbose:
                            print(f'bin {self.used_bins - self.n_bins}, loc: {loc}, space util: {packer.space_utilization() * 100:.2f}, packed items: {len(packer.splits)}')

                        self.used_packers.append(self.packers[loc])
                        self.packers[loc] = SpacePartitioner(self.size)
                        self.packers[loc].reset()
                        added += 1
                        self.used_bins += 1
                else:
                    raise Exception('not implemented')
                
                next_state = self.state(step=True)
        
        return next_state, reward, done
    
    def indices(self, actions):
        """Helper method to get indices of available actions"""
        return [
            (i, j, k) 
            for i in range(len(actions)) 
            for j in range(len(actions[i])) 
            for k in range(len(actions[i][j]))
        ]