In [1]:
import pandas as pd
import numpy as np
import datetime
import random

In [2]:
%ls

[0m[01;34mapp[0m/               notes_about_csv_file.txt  README.md
bill_of_lading.py  [01;34m__pycache__[0m/              shipment_builder.ipynb
items.csv          QUESTION.docx             shipment_requirements.yml


In [3]:
with open("notes_about_csv_file.txt") as notes:
    print(notes.read())

The items.csv file is structured as such:

item_id	  item_group  cubic_volume_ft
10413	  A	          0.1
10341	  A	          0.5
10004	  B	          1.0
80014	  C	          0.3
20242	  B	          0.4
…	      …	          …

Each record in this csv file is representative of a single item.
The item_id field is a unique identifier for the item, while the item_type and cubic_volume_ft fields are attributes of the item.



In [4]:
def clean_csv():
    import pandas as pd

    stock = pd.read_csv("app/data/items.csv")

    # Isolate the data to only the three necessary columns:
        # 'item_id', 'item_group', 'cubic_volume_ft'
    # Drop any NaN rows from the data
    # Sort the values based on Cubic volume
        # Will sort smallest to largest
    # Reset the index due to shuffling from sorting
  
    return  (stock.loc[:,['item_id', 'item_group', 'cubic_volume_ft']]
                  .dropna()
                  .sort_values("cubic_volume_ft",
                               ascending=True)
                  .reset_index(drop=True)
            )
    
    '''
    Future implementation will include functionality for: 
        Handling NaN values beyond just dropping them
        Include column testing to ensure data types
    '''

In [5]:
items = clean_csv()

In [6]:
items.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 3 columns):
item_id            200 non-null float64
item_group         200 non-null object
cubic_volume_ft    200 non-null float64
dtypes: float64(2), object(1)
memory usage: 4.8+ KB


In [7]:
items.head()

Unnamed: 0,item_id,item_group,cubic_volume_ft
0,8178.0,C,0.11
1,9667.0,B,0.11
2,4093.0,C,0.12
3,1367.0,D,0.12
4,2194.0,C,0.13


In [8]:
def data_generator():
    import random
    import pandas as pd
    return (pd.DataFrame(data = {'key': [random.randint(0,199) for x in range(random.randint(175,225))]})
              .merge(clean_csv(), 
                     left_on='key', 
                     right_index=True)
              .drop('key',
                    axis = 1)
              .reset_index(drop=True)
           )

In [9]:
def stock():
    import glob
    
    # It's nice to assume clean data, and to be right for once
    
    stock = pd.DataFrame()
    for csv in glob.glob("app/data/tmp/*.csv"):
        stock = stock.append(pd.read_csv(csv))
    
    return stock.reset_index(drop=True)

In [10]:
def generate_shipment_id():
    import re
    return int(re.sub("[^0-9]", "", str(datetime.datetime.today()))[:17])

In [11]:
def shipments(stock) :
    # Create a blank shipment sheet
    shipments = pd.DataFrame()

    while stock.empty == False :

        # Get the largest item by cubic volume and remove from stock
        stock, largest_item = stock.drop(stock.tail(1).index, axis=0), stock.tail(1)

        bundle = largest_item

        # Filter the remaining stock by what CAN still fit in the box
        # Grab the index of the item and the item

        for index, item in (stock[stock.cubic_volume_ft.values < (1.58 - largest_item.cubic_volume_ft.values)]
                            .sort_values("cubic_volume_ft",
                                         ascending=False)
                           ).iterrows():
            
            # If there is no item in stock that could fit into the bundle break out of the matrix
            if (bundle.cubic_volume_ft.sum() + stock.cubic_volume_ft.values.min()) > 1.58 :
                break
            
            # If it fits it sits
            # Add the item to the bundle
            # Drop item from the stock
            elif (bundle.cubic_volume_ft.sum() + item.cubic_volume_ft) <= 1.58 :
                stock, item = (stock.drop(index), item)
                bundle = bundle.append(item)

        #Issue a shipment id to the bundle
        bundle["shipment_id"] = generate_shipment_id()

        #Add bundle to the shipment file
        shipments = shipments.append(bundle)
    return shipments

In [12]:
shipments = shipments(items)

In [13]:
shipments.head(5).reset_index(drop=True)

Unnamed: 0,item_id,item_group,cubic_volume_ft,shipment_id
0,4643.0,B,1.3,20190302112452614
1,8629.0,C,0.27,20190302112452614
2,6549.0,A,1.29,20190302112452631
3,2207.0,B,0.29,20190302112452631
4,4235.0,B,1.28,20190302112452650


In [14]:
def summary(shipments):
    
    # Build initial summaries based on items and cubic volume in feet
    data = {'Total Items' : len(shipments.item_id),
            'Total Cubic Volume in Feet' : shipments.cubic_volume_ft.sum(),
            'Total Item Groups' : len(shipments.item_group.unique())}
    
    # Check for shipment id and build additional shipment summaries
    if 'shipment_id' in shipments.keys() :
        data['Total Shipments'] = len(shipments.shipment_id.unique())
        data['Cubic Volume not Utilized'] = (1.58*len(shipments.shipment_id.unique()) - shipments.cubic_volume_ft.sum())
        data['Percent Cubic Volume not Utilized'] = round(((1.58 * len(shipments.shipment_id.unique()) - shipments.cubic_volume_ft.sum()) / 
                                                     shipments.cubic_volume_ft.sum()) * 100, 2)
    # return resulting summary as a DataFrame
    return (pd.DataFrame(data, 
                         index=[0])
              .rename({0:'Details'})
           )

In [15]:
summary = summary(shipments)

In [16]:
summary['Total Shipments'][0]

87

In [17]:
summary.head()

Unnamed: 0,Total Items,Total Cubic Volume in Feet,Total Item Groups,Total Shipments,Cubic Volume not Utilized,Percent Cubic Volume not Utilized
Details,200,135.52,4,87,1.94,1.43


<h3>The stock is currently empty so lets refill our stock. This time lets build boxes based on group

In [18]:
def get_groups():
    groups = stock_sorted.item_group.unique()
    return groups