In [1]:
import pandas as pd
import numpy as np
import time
import random

In [2]:
%ls

[0m[01;34mapp[0m/               notes_about_csv_file.txt  README.md
bill_of_lading.py  [01;34m__pycache__[0m/              shipment_requirements.yml
items.csv          QUESTION.docx             target_interview.ipynb


In [3]:
with open("notes_about_csv_file.txt") as notes:
    print(notes.read())

The items.csv file is structured as such:

item_id	  item_group  cubic_volume_ft
10413	  A	          0.1
10341	  A	          0.5
10004	  B	          1.0
80014	  C	          0.3
20242	  B	          0.4
…	      …	          …

Each record in this csv file is representative of a single item.
The item_id field is a unique identifier for the item, while the item_type and cubic_volume_ft fields are attributes of the item.



In [4]:
def clean_csv():
    import pandas as pd

    stock = pd.read_csv("app/data/items.csv")

    # Isolate the data to only the three necessary columns:
        # 'item_id', 'item_group', 'cubic_volume_ft'
    # Drop any NaN rows from the data
    # Sort the values based on Cubic volume
        # Will sort smallest to largest
    # Reset the index due to shuffling from sorting
  
    return  (stock.loc[:,['item_id', 'item_group', 'cubic_volume_ft']]
                  .dropna()
                  .sort_values("cubic_volume_ft",
                               ascending=True)
                  .reset_index(drop=True)
            )
    
    '''
    Future implementation will include functionality for: 
        Handling NaN values beyond just dropping them
        Include column testing to ensure data types
    '''

In [5]:
stock = clean_csv()

In [6]:
stock.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 3 columns):
item_id            200 non-null float64
item_group         200 non-null object
cubic_volume_ft    200 non-null float64
dtypes: float64(2), object(1)
memory usage: 4.8+ KB


In [7]:
stock.head()

Unnamed: 0,item_id,item_group,cubic_volume_ft
0,8178.0,C,0.11
1,9667.0,B,0.11
2,4093.0,C,0.12
3,1367.0,D,0.12
4,2194.0,C,0.13


In [39]:
def data_generator :
    [random.randint(0,199) for x in range(random.randint(175,225))]

[197,
 121,
 147,
 28,
 158,
 189,
 50,
 143,
 160,
 61,
 39,
 194,
 177,
 106,
 9,
 125,
 74,
 25,
 79,
 165,
 197,
 60,
 96,
 71,
 60,
 55,
 153,
 144,
 161,
 47,
 93,
 89,
 34,
 0,
 110,
 169,
 154,
 4,
 58,
 140,
 137,
 72,
 149,
 60,
 4,
 119,
 104,
 173,
 79,
 50,
 0,
 2,
 124,
 90,
 10,
 179,
 149,
 115,
 60,
 197,
 62,
 42,
 1,
 56,
 84,
 71,
 183,
 45,
 164,
 41,
 19,
 114,
 137,
 104,
 153,
 103,
 130,
 176,
 65,
 132,
 120,
 70,
 183,
 161,
 126,
 21,
 51,
 95,
 164,
 42,
 63,
 101,
 181,
 8,
 189,
 65,
 4,
 101,
 135,
 27,
 84,
 73,
 57,
 155,
 133,
 14,
 0,
 191,
 167,
 91,
 117,
 185,
 62,
 67,
 5,
 141,
 55,
 127,
 192,
 86,
 156,
 57,
 72,
 182,
 52,
 32,
 91,
 62,
 50,
 98,
 76,
 136,
 191,
 140,
 10,
 113,
 64,
 39,
 64,
 20,
 159,
 77,
 34,
 150,
 175,
 17,
 27,
 183,
 20,
 91,
 158,
 166,
 96,
 26,
 83,
 123,
 124,
 130,
 77,
 145,
 161,
 55,
 55,
 96,
 134,
 196,
 181,
 188,
 24,
 36,
 195,
 153,
 115,
 114,
 12,
 122,
 194,
 196,
 175,
 35,
 9,
 105,
 130,
 72,
 1

In [31]:
random.randint()

Object `random.randint()` not found.


In [8]:
def generate_shipment_id():
    return str(time.clock()) + str(random.randint(0,1000)/1000)

In [18]:
def shipments(stock) :
    # Create a blank shipment sheet
    shipments = pd.DataFrame()

    while stock.empty == False :

        # Get the largest item by cubic volume and remove from stock
        stock, largest_item = stock.drop(stock.tail(1).index, axis=0), stock.tail(1)

        bundle = largest_item

        # Filter the remaining stock by what CAN still fit in the box
        # Grab the index of the item and the item

        for index, item in (stock[stock.cubic_volume_ft.values < (1.58 - largest_item.cubic_volume_ft.values)]
                            .sort_values("cubic_volume_ft",
                                         ascending=False)
                           ).iterrows():
            
            # If there is no item in stock that could fit into the bundle break out of the matrix
            if (bundle.cubic_volume_ft.sum() + stock.cubic_volume_ft.values.min()) > 1.58 :
                break
            
            # If it fits it sits
            # Add the item to the bundle
            # Drop item from the stock
            elif (bundle.cubic_volume_ft.sum() + item.cubic_volume_ft) <= 1.58 :
                stock, item = (stock.drop(index), item)
                bundle = bundle.append(item)

        #Issue a shipment id to the bundle
        bundle["shipment_id"] = gen_id.generate_shipment_id()

        #Add bundle to the shipment file
        shipments = shipments.append(bundle)
    return shipments

In [19]:
shipments = build_shipments(stock)
shipments.to_csv("shipment_detail")

  


In [20]:
shipments.head(5).reset_index(drop=True)

Unnamed: 0,item_id,item_group,cubic_volume_ft,shipment_id
0,4643.0,B,1.3,1.653370.091
1,8629.0,C,0.27,1.653370.091
2,6549.0,A,1.29,1.6586930.419
3,2207.0,B,0.29,1.6586930.419
4,4235.0,B,1.28,1.6642930.126


In [13]:
def summary(shipments):
    
    # Build initial summaries based on items and cubic volume in feet
    data = {'Total Items' : len(shipments.item_id),
            'Total Cubic Volume in Feet' : shipments.cubic_volume_ft.sum(),
            'Total Item Groups' : len(shipments.item_group.unique())}
    
    # Check for shipment id and build additional shipment summaries
    if 'shipment_id' in shipments.keys() :
        data['Total Shipments'] = len(shipments.shipment_id.unique())
        data['Cubic Volume not Utilized'] = (1.58*len(shipments.shipment_id.unique()) - shipments.cubic_volume_ft.sum())
        data['Percent Cubic Volume not Utilized'] = round(((1.58 * len(shipments.shipment_id.unique()) - shipments.cubic_volume_ft.sum()) / 
                                                     shipments.cubic_volume_ft.sum()) * 100, 2)
    # return resulting summary as a DataFrame
    return (pd.DataFrame(data, 
                         index=[0])
              .rename({0:'Details'})
           )

In [23]:
summary = summary(shipments)

In [28]:
summary['Total Shipments'][0]

87

In [14]:
(shipments.drop(['item_id', 'item_group'],
                axis=1)
          .groupby('package_id')
          .agg(['count','sum'])
)["cubic_volume_ft"].index

Index(['0.5230060.968', '0.529990.905', '0.5365350.242', '0.5421470.29',
       '0.5476410.169', '0.5532960.382', '0.5594180.394', '0.5649840.887',
       '0.5703520.673', '0.5759030.917', '0.5814410.658', '0.5864810.9',
       '0.5917960.736', '0.5969520.172', '0.6019540.085', '0.6069830.861',
       '0.6120260.542', '0.6170640.365', '0.6223070.34', '0.6275320.533',
       '0.6325510.485', '0.6375930.844', '0.6426660.925', '0.6479050.32',
       '0.6529380.725', '0.6579960.875', '0.6629980.452', '0.6681350.627',
       '0.6740680.986', '0.6796510.215', '0.6853960.778', '0.6910830.028',
       '0.6966480.875', '0.7017390.027', '0.7070790.916', '0.7125630.532',
       '0.7176640.328', '0.722950.334', '0.7289480.862', '0.7340540.282',
       '0.7404270.671', '0.7468650.23', '0.7524120.81', '0.7580030.212',
       '0.7637240.478', '0.7691490.193', '0.774540.112', '0.780230.843',
       '0.785360.601', '0.7907430.823', '0.7960080.685', '0.8010430.292',
       '0.8061390.158', '0.8216960.85

In [15]:
(shipments.drop(['item_id', 'item_group'],
                axis=1)
          .groupby('package_id')
          .agg(['count','sum'])
)['cubic_volume_ft']['sum'].mean()

1.5577011494252875

<h3>The stock is currently empty so lets refill our stock. This time lets build boxes based on group

In [16]:
stock_sorted = (stock_clean.sort_values("cubic_volume_ft",
                                       ascending=True)
                           .reset_index(drop=True)
               )

NameError: name 'stock_clean' is not defined

In [None]:
def get_groups():
    groups = stock_sorted.item_group.unique()
    return groups

In [None]:
shipment_by_group = pd.DataFrame()

for group in get_groups():
    group_shipment = build_shipments(stock_sorted[stock_sorted.item_group == group])
    
    group_shipment.to_csv("shipment_detail_group_"+group)
    
    shipment_by_group = shipment_by_group.append(group_shipment)

In [None]:
shipment_by_group.head(10)

In [None]:
len(shipment_by_group.package_id.unique())

In [None]:
(shipment_by_group.drop(['item_id', 'item_group'],
                         axis=1)
                   .groupby('package_id')
                   .agg(['count','sum'])
).head(5)

In [None]:
(shipment_by_group.drop(['item_id', 'item_group'],
                        axis=1)
                  .groupby('package_id')
                  .agg(['count','sum'])
)['cubic_volume_ft']['sum'].mean()