In [1]:
import pandas as pd
import numpy as np
import time
import random

In [2]:
%ls

[0m[01;34mapp[0m/                      [01;34m__pycache__[0m/             shipment_detail_group_C
bill_of_lading.py         QUESTION.docx            shipment_detail_group_D
drop_down_menu.html       shipment_detail          target_interview.ipynb
items.csv                 shipment_detail_group_A
notes_about_csv_file.txt  shipment_detail_group_B


In [3]:
with open("notes_about_csv_file.txt") as notes:
    print(notes.read())

The items.csv file is structured as such:

item_id	  item_group  cubic_volume_ft
10413	  A	          0.1
10341	  A	          0.5
10004	  B	          1.0
80014	  C	          0.3
20242	  B	          0.4
…	      …	          …

Each record in this csv file is representative of a single item.
The item_id field is a unique identifier for the item, while the item_type and cubic_volume_ft fields are attributes of the item.



In [4]:
def clean_csv():
    import pandas as pd

    stock = pd.read_csv("app/data/items.csv")

    # Isolate the data to only the three necessary columns:
        # 'item_id', 'item_group', 'cubic_volume_ft'
    # Drop any NaN rows from the data
    # Sort the values based on Cubic volume
        # Will sort smallest to largest
    # Reset the index due to shuffling from sorting
  
    return  (stock.loc[:,['item_id', 'item_group', 'cubic_volume_ft']]
                  .dropna()
                  .sort_values("cubic_volume_ft",
                               ascending=True)
                  .reset_index(drop=True)
            )
    
    '''
    Future implementation will include functionality for: 
        Handling NaN values beyond just dropping them
        Include column testing to ensure data types
    '''

In [5]:
stock = clean_csv()

In [6]:
stock.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 3 columns):
item_id            200 non-null float64
item_group         200 non-null object
cubic_volume_ft    200 non-null float64
dtypes: float64(2), object(1)
memory usage: 4.8+ KB


In [7]:
stock.head()

Unnamed: 0,item_id,item_group,cubic_volume_ft
0,8178.0,C,0.11
1,9667.0,B,0.11
2,4093.0,C,0.12
3,1367.0,D,0.12
4,2194.0,C,0.13


In [8]:
def generate_shipment_id():
    return str(time.clock()) + str(random.randint(0,1000)/1000)

In [11]:
def build_shipments(stock) :
    # Create a blank shipment sheet
    shipments = pd.DataFrame()

    while stock.empty == False :

        # Get the largest item by cubic volume and remove from stock
        stock, largest_item = stock.drop(stock.tail(1).index, axis=0), stock.tail(1)

        bundle = largest_item

        # Filter the remaining stock by what CAN still fit in the box
        # Grab the index of the item and the item

        for index, item in (stock[stock.cubic_volume_ft.values < (1.58 - largest_item.cubic_volume_ft.values)]
                            .sort_values("cubic_volume_ft",
                                         ascending=False)
                           ).iterrows():
            
            # If there is no item in stock that could fit into the bundle break out of the matrix
            if (bundle.cubic_volume_ft.sum() + stock.cubic_volume_ft.values.min()) > 1.58 :
                break
            
            # If it fits it sits
            # Add the item to the bundle
            # Drop item from the stock
            elif (bundle.cubic_volume_ft.sum() + item.cubic_volume_ft) <= 1.58 :
                stock, item = (stock.drop(index), item)
                bundle = bundle.append(item)

        #Issue a shipment id to the bundle
        bundle["package_id"] = generate_shipment_id()

        #Add bundle to the shipment file
        shipments = shipments.append(bundle)
    return shipments

In [12]:
shipments = build_shipments(stock)
shipments.to_csv("shipment_detail")

In [13]:
shipments.head(5).reset_index(drop=True)

Unnamed: 0,item_id,item_group,cubic_volume_ft,package_id
0,4643.0,B,1.3,1.1354750.963
1,8629.0,C,0.27,1.1354750.963
2,6549.0,A,1.29,1.1462560.397
3,2207.0,B,0.29,1.1462560.397
4,4235.0,B,1.28,1.1535720.756


In [14]:
len(shipments.package_id.unique())

87

In [15]:
(shipments.drop(['item_id', 'item_group'],
                axis=1)
          .groupby('package_id')
          .agg(['count','sum'])
).head(5)

Unnamed: 0_level_0,cubic_volume_ft,cubic_volume_ft
Unnamed: 0_level_1,count,sum
package_id,Unnamed: 1_level_2,Unnamed: 2_level_2
1.1354750.963,2,1.57
1.1462560.397,2,1.58
1.1535720.756,2,1.58
1.1594410.02,2,1.57
1.1649470.685,2,1.58


In [18]:
(shipments.drop(['item_id', 'item_group'],
                axis=1)
          .groupby('package_id')
          .agg(['count','sum'])
)["cubic_volume_ft"].index

Index(['1.1354750.963', '1.1462560.397', '1.1535720.756', '1.1594410.02',
       '1.1649470.685', '1.1707560.937', '1.1760010.66', '1.1812170.893',
       '1.186740.662', '1.192010.564', '1.1972620.002', '1.2024810.863',
       '1.2078240.744', '1.2130990.271', '1.2186220.249', '1.2244230.293',
       '1.2301410.572', '1.235860.467', '1.241540.965', '1.2473590.589',
       '1.2530960.459', '1.2587520.112', '1.2644250.027', '1.2700810.234',
       '1.2755660.276', '1.2807450.94', '1.2859720.651', '1.2911670.928',
       '1.2964040.386', '1.3016280.922', '1.3069790.546', '1.3124770.8',
       '1.3176320.804', '1.3227640.985', '1.3279150.853', '1.3332610.662',
       '1.3384290.069', '1.3439270.272', '1.3490990.597', '1.3543020.565',
       '1.3594560.568', '1.3646740.401', '1.3698360.603', '1.375130.201',
       '1.3803180.171', '1.3855860.301', '1.3907640.694', '1.396140.537',
       '1.4012840.153', '1.406630.366', '1.4118230.956', '1.4170410.503',
       '1.4222780.214', '1.4374090.60

In [None]:
(shipments.drop(['item_id', 'item_group'],
                axis=1)
          .groupby('package_id')
          .agg(['count','sum'])
)['cubic_volume_ft']['sum'].mean()

<h3>The stock is currently empty so lets refill our stock. This time lets build boxes based on group

In [None]:
stock_sorted = (stock_clean.sort_values("cubic_volume_ft",
                                       ascending=True)
                           .reset_index(drop=True)
               )

In [None]:
def get_groups():
    groups = stock_sorted.item_group.unique()
    return groups

In [None]:
shipment_by_group = pd.DataFrame()

for group in get_groups():
    group_shipment = build_shipments(stock_sorted[stock_sorted.item_group == group])
    
    group_shipment.to_csv("shipment_detail_group_"+group)
    
    shipment_by_group = shipment_by_group.append(group_shipment)

In [None]:
shipment_by_group.head(10)

In [None]:
len(shipment_by_group.package_id.unique())

In [None]:
(shipment_by_group.drop(['item_id', 'item_group'],
                         axis=1)
                   .groupby('package_id')
                   .agg(['count','sum'])
).head(5)

In [None]:
(shipment_by_group.drop(['item_id', 'item_group'],
                        axis=1)
                  .groupby('package_id')
                  .agg(['count','sum'])
)['cubic_volume_ft']['sum'].mean()